pax_global_header00006660000000000000000000000064142412171700014511gustar00rootroot0000000000000052 comment=a7406551f8731c50903b6f083d0c4a5046d8ad50 mothur-1.48.0/000077500000000000000000000000001424121717000131215ustar00rootroot00000000000000mothur-1.48.0/.gitignore000066400000000000000000000005241424121717000151120ustar00rootroot00000000000000*.logfile *.o *.pbxproj *.zip .DS_Store .idea build xcuserdata project.xcworkspace *.xcuserdata TARGET_BUILD_DIRTARGET_BUILD_DIR TARGET_BUILD_DIR DerivedData *.xcuserdata *.xml nbproject/ mothur Mothur.1 DerivedData/ mothur.entitlements tools Products mothur_resources_11.1 mothur_resources_10.14 *.stdout uchime *.stderr *.pat tools/ mothur-1.48.0/.travis.yml000066400000000000000000000004701424121717000152330ustar00rootroot00000000000000language: C++ os: - osx - linux addons: apt: packages: - libreadline6 - libreadline6-dev - libboost-all-dev before install: - if [[ "TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "TRAVIS_OS_NAME" == "osx" ]]; then brew install readline boost; fi script: make install mothur-1.48.0/CITATION.md000066400000000000000000000047421424121717000146640ustar00rootroot00000000000000# Cite mothur To cite mothur in a scholarly article, please use: > Schloss PD et al. 2009. Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Applied and Environmental Microbiology 75:7537–7541. A BibTeX entry for LaTeX users: ```TeX @article{schloss_introducting_2009, author = {Schloss, Patrick D. and Westcott, Sarah L. and Ryabin, Thomas and Hall, Justine R. and Hartmann, Martin and Hollister, Emily B. and Lesniewski, Ryan A. and Oakley, Brian B. and Parks, Donovan H. and Robinson, Courtney J. and Sahl, Jason W. and Stres, Blaz and Thallinger, Gerhard G. and Van Horn, David J. and Weber, Carolyn F.}, title = {Introducing mothur: Open-Source, Platform-Independent, Community-Supported Software for Describing and Comparing Microbial Communities}, volume = {75}, number = {23}, pages = {7537--7541}, year = {2009}, doi = {10.1128/AEM.01541-09}, issn = {0099-2240}, URL = {https://aem.asm.org/content/75/23/7537}, journal = {Applied and Environmental Microbiology} } ``` ## Cite OptiClust If you use the OptiClust method in the `cluster` or `cluster.split` commands, please also cite the OptiClust paper: > Westcott SL, Schloss PD. 2017. OptiClust, an Improved Method for Assigning Amplicon-Based Sequence Data to Operational Taxonomic Units. mSphere 2:e00073-17. A BibTeX entry for LaTeX users: ```TeX @article{westcott_opticlust_2017, title = {{{OptiClust}}, an {{Improved Method}} for {{Assigning Amplicon}}-{{Based Sequence Data}} to {{Operational Taxonomic Units}}}, author = {Westcott, Sarah L. and Schloss, Patrick D.}, year = {2017}, volume = {2}, number = {2}, pages = {e00073-17}, issn = {2379-5042}, doi = {10.1128/mSphereDirect.00073-17}, journal = {mSphere} } ``` ## Cite OptiFit If you use the `cluster.fit` command, please also cite the OptiFit paper: > Sovacool KL, Westcott SL, Mumphrey MB, Dotson GA, Schloss PD. 2022. OptiFit: an Improved Method for Fitting Amplicon Sequences to Existing OTUs. mSphere 7:e00916-21. A BibTeX entry for LaTeX users: ```TeX @article{sovacool_optifit_2022, title = {{{OptiFit}}: An {{Improved Method}} for {{Fitting Amplicon Sequences}} to {{Existing OTUs}}}, author = {Sovacool, Kelly L. and Westcott, Sarah L. and Mumphrey, M. Brodie and Dotson, Gabrielle A. and Schloss, Patrick D.}, year = {2022}, volume = {7}, number = {1}, pages = {e00916-21}, doi = {10.1128/msphere.00916-21}, journal = {mSphere} } ``` mothur-1.48.0/External_Libraries_INSTALL000066400000000000000000000013341424121717000200110ustar00rootroot00000000000000 #Install the Boost static libraries 1. Download boost source https://www.boost.org/users/download/ 2. cd path/to/boost_1_xx_0 3. $ ./bootstrap.sh --help (for options) 4. $ ./bootstrap.sh 5. $ ./b2 link=static #Install the zlib static library 1. Download zlib https://zlib.net 2. cd path/to/zlib 3. ./configure --static 4. make #Install gsl static library 1. Download gsl source https://www.gnu.org/software/gsl/ 2. cd path/to/gsl 3. ./configure -disable-shared 4. make 5. make check 6. make install #Install HDF5 static library 1. Download HDf5 source https://www.hdfgroup.org/downloads/hdf5/source-code/ 2. cd path/to/hdf5 3. ./configure -disable-shared --without-szlib --enable-cxx 4. make 5. make check 6. make install mothur-1.48.0/INSTALL.md000066400000000000000000000044421424121717000145550ustar00rootroot00000000000000# Mothur install instructions Either download the precompiled binaries or compile from the source code. More detailed installation instructions are on [the mothur wiki](https://www.mothur.org/wiki/Installation). ## Download precompiled binaries The easiest way to get mothur is to download the release from [GitHub]([GitHub](https://github.com/mothur/mothur/releases)), unzip it, and you're ready to run Mothur. ## Compile mothur from source (For Unix-based operating systems.) Download the mothur [source code](https://github.com/mothur/mothur). Download the mothur tools [external binaries](https://github.com/mothur/mothur/releases/). Note: Tool version requirements - vsearch v2.15.2, uchime ### Compiling with Boost: #### 1. Install dependencies. You will need to install the following dependencies for Boost if not already on your machine: * bzip2 * bzip2-devel * libz * zlib-devel You can use a package manager such as yum, apt-get, homebrew, or conda. #### 2. Download [Boost](http://www.boost.org). #### 3. Follow their install [instructions]( http://www.boost.org/doc/libs/1_58_0/more/getting_started/unix-variants.html#easy-build-and-install): ``` tar -xzvf boost_versionNumber.tar.gz cd boost_versionNumber/ ./bootstrap.sh --prefix=/desired/install/path ./b2 install ``` Alternatively, you can install boost on Linux with: ``` sudo apt-get install libboost-all-dev sudo yum install libboost-all-dev ``` #### 4. Compile mothur: ``` cd /path/to/mothur make ``` If you get linking errors, it is likely because the zlib files were not found. You may need to add gzip.cpp and zlib.cpp to the source folder of mothur. They are located in boost_versionNumber/libs/iostreams/src/. ### Compiling with HDF5: #### 1. Download and install [HDF5]( https://portal.hdfgroup.org/display/support/HDF5+1.10.3). ``` tar -xzvf hdf5-1.10.3.tar.gz cd hdf5-1.10.3 ./configure --prefix=/desired/install/path --enable-cxx --enable-static --disable-shared make check make install ``` #### 2. Edit the mothur makefile. ``` cd /path/to/mothur ``` Open the makefile in your preferred text editor: ``` vi Makefile ``` And edit the HDF5 filepaths: ``` HDF5_LIBRARY_DIR ?= "/path/to/hdf5/lib" HDF5_INCLUDE_DIR ?= "/path/to/hdf5/include" ``` Save and close the makefile. (vi command `:wq`) #### 3. Compile mothur. ``` make ``` mothur-1.48.0/LICENSE.md000066400000000000000000000763101424121717000145340ustar00rootroot00000000000000GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright © 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. “This License” refers to version 3 of the GNU General Public License. “Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. “The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations. To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work. A “covered work” means either the unmodified Program or a work based on the Program. To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work. A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”. c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. “Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. “Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's “contributor version”. A contributor's “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS mothur-1.48.0/Makefile000066400000000000000000000107371424121717000145710ustar00rootroot00000000000000################################################### # # Makefile for mothur # ################################################### # # Macros # # OPTIMIZE - yes will increase speed of executable. # USEREADLINE - link with readline libraries. Must have readline installed. Windows set to no. # USEBOOST - link with boost libraries. Must install boost. Allows the make.contigs command to read .gz files. # USEHDF5 - link with HDF5cpp libraries. Must install HDF5. Allows the biom.info command to read Biom format 2.0. # USEGSL - link with GNU Scientific libraries. Must install GSL. Allows the estimiator.single command to find diversity estimates. # HDF5_LIBRARY_DIR - location of HDF5 libraries # HDF5_INCLUDE_DIR - location of HDF5 include files # BOOST_LIBRARY_DIR - location of boost libraries # BOOST_INCLUDE_DIR - location of boost include files # GSL_LIBRARY_DIR - location of GSL libraries # GSL_INCLUDE_DIR - location of GSL include files # MOTHUR_FILES - The MOTHUR_FILES parameter is optional, but allows you to set a default location for mothur to look for input files it can't find. This is often used for reference files you want to store in one location separate from your data. INSTALL_DIR ?= "\"Enter_your_mothur_install_path_here\"" OPTIMIZE ?= yes USEREADLINE ?= yes USEBOOST ?= no USEHDF5 ?= no USEGSL ?= no LOGFILE_NAME ?= no BOOST_LIBRARY_DIR ?= "\"Enter_your_boost_library_path_here\"" BOOST_INCLUDE_DIR ?= "\"Enter_your_boost_include_path_here\"" HDF5_LIBRARY_DIR ?= "\"Enter_your_HDF5_library_path_here\"" HDF5_INCLUDE_DIR ?= "\"Enter_your_HDF5_include_path_here\"" GSL_LIBRARY_DIR ?= "\"Enter_your_GSL_library_path_here\"" GSL_INCLUDE_DIR ?= "\"Enter_your_GSL_include_path_here\"" MOTHUR_FILES="\"Enter_your_default_path_here\"" MOTHUR_TOOLS="\"Enter_your_mothur_tools_path_here\"" VERSION = "\"1.48.0\"" RELEASE_DATE = "\"5/20/22\"" # Set a static logfile name ifeq ($(strip $(LOGFILE_NAME)),yes) LOGFILE_NAME="\"silent\"" endif ifeq ($(strip $(OPTIMIZE)),yes) CXXFLAGS += -O3 endif CXXFLAGS += -std=c++11 -pthread -DVERSION=${VERSION} -DRELEASE_DATE=${RELEASE_DATE} LDFLAGS += -std=c++11 -pthread ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"") else CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES} endif ifeq ($(strip $(MOTHUR_TOOLS)),"\"Enter_your_mothur_tools_path_here\"") else CXXFLAGS += -DMOTHUR_TOOLS=${MOTHUR_TOOLS} endif # if you do not want to use the readline library, set this to no. # make sure you have the library installed ifeq ($(strip $(USEREADLINE)),yes) CXXFLAGS += -DUSE_READLINE LIBS += -lreadline endif #User specified boost library ifeq ($(strip $(USEBOOST)),yes) LDFLAGS += -L ${BOOST_LIBRARY_DIR} LIBS += -lboost_iostreams -lboost_system -lboost_filesystem -lz CXXFLAGS += -DUSE_BOOST -I ${BOOST_INCLUDE_DIR} endif #User specified HDF5 library ifeq ($(strip $(USEHDF5)),yes) LDFLAGS += -L ${HDF5_LIBRARY_DIR} LIBS += -lhdf5 -lhdf5_cpp CXXFLAGS += -DUSE_HDF5 -I ${HDF5_INCLUDE_DIR} endif #User specified GSL library ifeq ($(strip $(USEGSL)),yes) LDFLAGS += -L ${GSL_LIBRARY_DIR} LIBS += -lgsl -lgslcblas -lm CXXFLAGS += -DUSE_GSL -I ${GSL_INCLUDE_DIR} endif # # INCLUDE directories for mothur # # VPATH=source/calculators:source/chimera:source/classifier:source/clearcut:source/commands:source/communitytype:source/datastructures:source/engines:source/metastats:source/read:source/svm:source/ skipUchime := source/uchime_src/ subdirs := $(sort $(dir $(filter-out $(skipUchime), $(wildcard source/*/)))) subDirIncludes = $(patsubst %, -I %, $(subdirs)) subDirLinking = $(patsubst %, -L%, $(subdirs)) CXXFLAGS += -I. $(subDirIncludes) LDFLAGS += $(subDirLinking) # # Get the list of all .cpp files, rename to .o files # OBJECTS=$(patsubst %.cpp,%.o,$(wildcard $(addsuffix *.cpp,$(subdirs)))) OBJECTS+=$(patsubst %.c,%.o,$(wildcard $(addsuffix *.c,$(subdirs)))) OBJECTS+=$(patsubst %.cpp,%.o,$(wildcard *.cpp)) OBJECTS+=$(patsubst %.c,%.o,$(wildcard *.c)) mothur : $(OBJECTS) uchime $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) $(LIBS) uchime : cd source/uchime_src && export CXX=$(CXX) && make clean && make && mv uchime ../../ && cd .. install : mothur ifeq ($(strip $(INSTALL_DIR)),"\"Enter_your_mothur_install_path_here\"") else mkdir -p ${INSTALL_DIR} mv mothur ${INSTALL_DIR}/mothur endif %.o : %.c %.h $(COMPILE.c) $(OUTPUT_OPTION) $< %.o : %.cpp %.h $(COMPILE.cpp) $(OUTPUT_OPTION) $< %.o : %.cpp %.hpp $(COMPILE.cpp) $(OUTPUT_OPTION) $< clean : @rm -f $(OBJECTS) mothur-1.48.0/Makefile-Windows000066400000000000000000000045001424121717000162100ustar00rootroot00000000000000################################################### # # Makefile for mothur # ################################################### # # Macros # # 64BIT_VERSION - set to no if you are using a 32bit arch. # OPTIMIZE - yes will increase speed of executable. # USEREADLINE - link with readline libraries. Must have readline installed. Windows set to no. # USEBOOST - link with boost libraries. Must install boost. Allows the make.contigs command to read .gz files. # BOOST_LIBRARY_DIR - location of boost libraries # BOOST_INCLUDE_DIR - location of boost include files # MOTHUR_FILES - default location for mothur to look for input files at runtime. Most often used for reference files. 64BIT_VERSION ?= yes USEREADLINE ?= no USEBOOST ?= no RELEASE_DATE = "\"09/01/2017\"" VERSION = "\"1.40.0\"" # Optimize to level 3: CXXFLAGS += -O3 #CPP_11 CXXFLAGS += -std=c++11 ifeq ($(strip $(64BIT_VERSION)),yes) #if you using cygwin to build Windows the following line CXX = x86_64-w64-mingw32-g++ CC = x86_64-w64-mingw32-g++ TARGET_ARCH += -m64 -static CXXFLAGS += -DBIT_VERSION endif CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION} # INCLUDE directories for mothur VPATH=source/calculators:source/chimera:source/classifier:source/clearcut:source/commands:source/communitytype:source/datastructures:source/metastats:source/randomforest:source/read:source/svm skipUchime := source/uchime_src/ subdirs := $(sort $(dir $(filter-out $(skipUchime), $(wildcard source/*/)))) subDirIncludes = $(patsubst %, -I %, $(subdirs)) subDirLinking = $(patsubst %, -L%, $(subdirs)) CXXFLAGS += -I. $(subDirIncludes) LDFLAGS += $(subDirLinking) # # Get the list of all .cpp files, rename to .o files # OBJECTS=$(patsubst %.cpp,%.o,$(wildcard $(addsuffix *.cpp,$(subdirs)))) OBJECTS+=$(patsubst %.c,%.o,$(wildcard $(addsuffix *.c,$(subdirs)))) OBJECTS+=$(patsubst %.cpp,%.o,$(wildcard *.cpp)) OBJECTS+=$(patsubst %.c,%.o,$(wildcard *.c)) mothur : $(OBJECTS) uchime $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) $(LIBS) strip mothur uchime: cd source/uchime_src && ./mk && mv uchime ../../ && cd .. install : mothur %.o : %.c %.h $(COMPILE.c) $(OUTPUT_OPTION) $< %.o : %.cpp %.h $(COMPILE.cpp) $(OUTPUT_OPTION) $< %.o : %.cpp %.hpp $(COMPILE.cpp) $(OUTPUT_OPTION) $< clean : @rm -f $(OBJECTS) @rm -f uchime mothur-1.48.0/Makefile_cluster000066400000000000000000000036771424121717000163370ustar00rootroot00000000000000USEREADLINE ?= yes USEBOOST ?= yes VERSION = "\"1.47.0\"" RELEASE_DATE = "\"1/21/22\"" MOTHUR_TOOLS="\"/nfs/turbo/schloss-lab/bin/mothur_src/tools/\"" # Optimize to level 3: CXXFLAGS += -O3 -std=c++11 -pthread -mtune=generic -DVERSION=${VERSION} -DRELEASE_DATE=${RELEASE_DATE} -DMOTHUR_TOOLS=${MOTHUR_TOOLS} LDFLAGS += -std=c++11 -pthread # if you do not want to use the readline library, set this to no. # make sure you have the library installed ifeq ($(strip $(USEREADLINE)),yes) CXXFLAGS += -DUSE_READLINE LIBS += -lreadline endif #The boost libraries allow you to read gz files. ifeq ($(strip $(USEBOOST)),yes) #statically link so the boost install is not required on users machine LDFLAGS += -L ${BOOST_LIBRARY_DIR} LIBS += -lboost_iostreams -lboost_system -lboost_filesystem -lz CXXFLAGS += -DUSE_BOOST -I ${BOOST_INCLUDE_DIR} endif # # INCLUDE directories for mothur # # VPATH=source/calculators:source/chimera:source/classifier:source/clearcut:source/commands:source/communitytype:source/datastructures:source/metastats:source/read:source/svm:source/engines skipUchime := source/uchime_src/ subdirs := $(sort $(dir $(filter-out $(skipUchime), source/, $(wildcard source/*/)))) subDirIncludes = $(patsubst %, -I %, $(subdirs)) subDirLinking = $(patsubst %, -L%, $(subdirs)) CXXFLAGS += -I. $(subDirIncludes) LDFLAGS += $(subDirLinking) # # Get the list of all .cpp files, rename to .o files # OBJECTS=$(patsubst %.cpp,%.o,$(wildcard $(addsuffix *.cpp,$(subdirs)))) OBJECTS+=$(patsubst %.c,%.o,$(wildcard $(addsuffix *.c,$(subdirs)))) OBJECTS+=$(patsubst %.cpp,%.o,$(wildcard *.cpp)) OBJECTS+=$(patsubst %.c,%.o,$(wildcard *.c)) mothur : $(OBJECTS) $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) $(LIBS) strip mothur install : mothur %.o : %.c %.h $(COMPILE.c) $(OUTPUT_OPTION) $< %.o : %.cpp %.h $(COMPILE.cpp) $(OUTPUT_OPTION) $< %.o : %.cpp %.hpp $(COMPILE.cpp) $(OUTPUT_OPTION) $< clean : @rm -f $(OBJECTS) mothur-1.48.0/Mothur.xcodeproj/000077500000000000000000000000001424121717000163735ustar00rootroot00000000000000mothur-1.48.0/Mothur.xcodeproj/project.pbxproj000066400000000000000000016137771424121717000214750ustar00rootroot00000000000000// !$*UTF8*$! { archiveVersion = 1; classes = { }; objectVersion = 46; objects = { /* Begin PBXBuildFile section */ 219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; }; 219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; }; 4803D5AD211CA67F001C63B5 /* testsharedrabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4803D5AB211CA67F001C63B5 /* testsharedrabundvector.cpp */; }; 4803D5B0211CD839001C63B5 /* testsharedrabundfloatvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4803D5AE211CD839001C63B5 /* testsharedrabundfloatvector.cpp */; }; 4803D5B3211DDA5A001C63B5 /* testsharedrabundvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4803D5B1211DDA5A001C63B5 /* testsharedrabundvectors.cpp */; }; 4803D5B621231D9D001C63B5 /* testsharedrabundfloatvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4803D5B421231D9D001C63B5 /* testsharedrabundfloatvectors.cpp */; }; 48098ED6219DE7A500031FA4 /* testsubsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48098ED4219DE7A500031FA4 /* testsubsample.cpp */; }; 4809EC95227B3A5B00B4D0E5 /* metrolognormal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E7E0A12278A21B00B74910 /* metrolognormal.cpp */; }; 4809EC98227B405700B4D0E5 /* metrologstudent.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC96227B405700B4D0E5 /* metrologstudent.cpp */; }; 4809EC99227B405700B4D0E5 /* metrologstudent.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC96227B405700B4D0E5 /* metrologstudent.cpp */; }; 4809EC9D227C9B3100B4D0E5 /* metrosichel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC9B227C9B3100B4D0E5 /* metrosichel.cpp */; }; 4809EC9E227C9B3100B4D0E5 /* metrosichel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC9B227C9B3100B4D0E5 /* metrosichel.cpp */; }; 4809ECA12280898E00B4D0E5 /* igrarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC9F2280898E00B4D0E5 /* igrarefaction.cpp */; }; 4809ECA22280898E00B4D0E5 /* igrarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809EC9F2280898E00B4D0E5 /* igrarefaction.cpp */; }; 4809ECA522831A5E00B4D0E5 /* lnabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809ECA322831A5E00B4D0E5 /* lnabundance.cpp */; }; 4809ECA622831A5E00B4D0E5 /* lnabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4809ECA322831A5E00B4D0E5 /* lnabundance.cpp */; }; 480D1E2A1EA681D100BF9C77 /* testclustercalcs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 480D1E281EA681D100BF9C77 /* testclustercalcs.cpp */; }; 480D1E311EA92D5500BF9C77 /* fakeoptimatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 480D1E2F1EA92D5500BF9C77 /* fakeoptimatrix.cpp */; }; 480E8DB11CAB12ED00A0D137 /* testfastqread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 480E8DAF1CAB12ED00A0D137 /* testfastqread.cpp */; }; 480E8DB21CAB1F5E00A0D137 /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; }; 4810D5B7218208CC00C668E8 /* testcounttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4810D5B5218208CC00C668E8 /* testcounttable.cpp */; }; 4815BEB12289E13500677EE2 /* lnrarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEAF2289E13500677EE2 /* lnrarefaction.cpp */; }; 4815BEB4228B371E00677EE2 /* lnshift.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEB2228B371E00677EE2 /* lnshift.cpp */; }; 4815BEB5228B371E00677EE2 /* lnshift.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEB2228B371E00677EE2 /* lnshift.cpp */; }; 4815BEB8228DD18400677EE2 /* lsabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEB6228DD18400677EE2 /* lsabundance.cpp */; }; 4815BEB9228DD18400677EE2 /* lsabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEB6228DD18400677EE2 /* lsabundance.cpp */; }; 4815BEBC2293189600677EE2 /* lsrarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEBA2293189600677EE2 /* lsrarefaction.cpp */; }; 4815BEBD2293189600677EE2 /* lsrarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEBA2293189600677EE2 /* lsrarefaction.cpp */; }; 4815BEBE2295A02800677EE2 /* diversityutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E7E0A42278AD4800B74910 /* diversityutils.cpp */; }; 4815BEC12295CE6800677EE2 /* siabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEBF2295CE6800677EE2 /* siabundance.cpp */; }; 4815BEC22295CE6800677EE2 /* siabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEBF2295CE6800677EE2 /* siabundance.cpp */; }; 4815BEC52296F19500677EE2 /* sirarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEC32296F19500677EE2 /* sirarefaction.cpp */; }; 4815BEC62296F19500677EE2 /* sirarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEC32296F19500677EE2 /* sirarefaction.cpp */; }; 4815BEC922970FA700677EE2 /* sishift.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEC722970FA700677EE2 /* sishift.cpp */; }; 4815BECA22970FA700677EE2 /* sishift.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4815BEC722970FA700677EE2 /* sishift.cpp */; }; 481623E21B56A2DB004C60B7 /* pcrseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481623E11B56A2DB004C60B7 /* pcrseqscommand.cpp */; }; 481E40DB244DFF5A0059C925 /* onegapignore.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481E40DA244DFF5A0059C925 /* onegapignore.cpp */; }; 481E40DD244F52460059C925 /* ignoregaps.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481E40DC244F52460059C925 /* ignoregaps.cpp */; }; 481E40DF244F619D0059C925 /* eachgapignore.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481E40DE244F619D0059C925 /* eachgapignore.cpp */; }; 481E40E1244F62980059C925 /* calculator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481E40E0244F62980059C925 /* calculator.cpp */; }; 481E40E3244F6A050059C925 /* eachgapdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481E40E2244F6A050059C925 /* eachgapdist.cpp */; }; 481FB51C1AC0A63E0076CFF3 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481FB51B1AC0A63E0076CFF3 /* main.cpp */; }; 481FB5261AC0ADA00076CFF3 /* sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DB12D37EC400DA6239 /* sequence.cpp */; }; 481FB5271AC0ADBA0076CFF3 /* mothurout.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75D12D37EC400DA6239 /* mothurout.cpp */; }; 481FB52A1AC19F8B0076CFF3 /* setseedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481FB5281AC19F8B0076CFF3 /* setseedcommand.cpp */; }; 481FB52B1AC1B09F0076CFF3 /* setseedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481FB5281AC19F8B0076CFF3 /* setseedcommand.cpp */; }; 481FB52C1AC1B0A70076CFF3 /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */; }; 481FB52E1AC1B0CB0076CFF3 /* testsetseedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481FB52D1AC1B0CB0076CFF3 /* testsetseedcommand.cpp */; }; 481FB5301AC1B5C80076CFF3 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; }; 481FB5311AC1B5CD0076CFF3 /* clearcut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69412D37EC400DA6239 /* clearcut.cpp */; }; 481FB5321AC1B5D00076CFF3 /* cmdargs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */; }; 481FB5331AC1B5D30076CFF3 /* distclearcut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CF12D37EC400DA6239 /* distclearcut.cpp */; }; 481FB5341AC1B5D60076CFF3 /* dmat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D312D37EC400DA6239 /* dmat.cpp */; }; 481FB5351AC1B5D90076CFF3 /* fasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DC12D37EC400DA6239 /* fasta.cpp */; }; 481FB5361AC1B5DC0076CFF3 /* getopt_long.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FC12D37EC400DA6239 /* getopt_long.cpp */; }; 481FB5371AC1B5E00076CFF3 /* cluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69812D37EC400DA6239 /* cluster.cpp */; }; 481FB5381AC1B5E30076CFF3 /* clusterclassic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69A12D37EC400DA6239 /* clusterclassic.cpp */; }; 481FB5391AC1B5E90076CFF3 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; 481FB53A1AC1B5EC0076CFF3 /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65E12D37EC300DA6239 /* bergerparker.cpp */; }; 481FB53B1AC1B5EF0076CFF3 /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66612D37EC400DA6239 /* boneh.cpp */; }; 481FB53C1AC1B5F10076CFF3 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66812D37EC400DA6239 /* bootstrap.cpp */; }; 481FB53D1AC1B5F80076CFF3 /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66C12D37EC400DA6239 /* bstick.cpp */; }; 481FB53F1AC1B6000076CFF3 /* canberra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67012D37EC400DA6239 /* canberra.cpp */; }; 481FB5401AC1B6030076CFF3 /* chao1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67612D37EC400DA6239 /* chao1.cpp */; }; 481FB5411AC1B6070076CFF3 /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6BB12D37EC400DA6239 /* coverage.cpp */; }; 481FB5421AC1B60D0076CFF3 /* efron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D712D37EC400DA6239 /* efron.cpp */; }; 481FB5431AC1B6110076CFF3 /* geom.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F012D37EC400DA6239 /* geom.cpp */; }; 481FB5441AC1B6140076CFF3 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70E12D37EC400DA6239 /* goodscoverage.cpp */; }; 481FB5451AC1B6170076CFF3 /* gower.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71212D37EC400DA6239 /* gower.cpp */; }; 481FB5461AC1B6190076CFF3 /* hamming.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71612D37EC400DA6239 /* hamming.cpp */; }; 481FB5471AC1B61C0076CFF3 /* heip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72412D37EC400DA6239 /* heip.cpp */; }; 481FB5481AC1B61F0076CFF3 /* hellinger.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72612D37EC400DA6239 /* hellinger.cpp */; }; 481FB5491AC1B6220076CFF3 /* invsimpson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72F12D37EC400DA6239 /* invsimpson.cpp */; }; 481FB54A1AC1B6270076CFF3 /* jackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73112D37EC400DA6239 /* jackknife.cpp */; }; 481FB54B1AC1B62A0076CFF3 /* logsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74112D37EC400DA6239 /* logsd.cpp */; }; 481FB54C1AC1B62D0076CFF3 /* manhattan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74712D37EC400DA6239 /* manhattan.cpp */; }; 481FB54D1AC1B6300076CFF3 /* memchi2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74B12D37EC400DA6239 /* memchi2.cpp */; }; 481FB54E1AC1B6340076CFF3 /* memchord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74D12D37EC400DA6239 /* memchord.cpp */; }; 481FB54F1AC1B63A0076CFF3 /* memeuclidean.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74F12D37EC400DA6239 /* memeuclidean.cpp */; }; 481FB5501AC1B63D0076CFF3 /* mempearson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75112D37EC400DA6239 /* mempearson.cpp */; }; 481FB5511AC1B6410076CFF3 /* npshannon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76D12D37EC400DA6239 /* npshannon.cpp */; }; 481FB5521AC1B6450076CFF3 /* odum.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77112D37EC400DA6239 /* odum.cpp */; }; 481FB5531AC1B6490076CFF3 /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78312D37EC400DA6239 /* parsimony.cpp */; }; 481FB5541AC1B64C0076CFF3 /* prng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79912D37EC400DA6239 /* prng.cpp */; }; 481FB5551AC1B64F0076CFF3 /* qstat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79D12D37EC400DA6239 /* qstat.cpp */; }; 481FB5561AC1B6520076CFF3 /* shannon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E512D37EC400DA6239 /* shannon.cpp */; }; 481FB5571AC1B6550076CFF3 /* shannoneven.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */; }; 481FB5581AC1B6590076CFF3 /* shannonrange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */; }; 481FB5591AC1B65D0076CFF3 /* sharedjabund.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */; }; 481FB55A1AC1B6600076CFF3 /* sharedace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E912D37EC400DA6239 /* sharedace.cpp */; }; 481FB55B1AC1B6630076CFF3 /* sharedanderbergs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */; }; 481FB55C1AC1B6660076CFF3 /* sharedbraycurtis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7EE12D37EC400DA6239 /* sharedbraycurtis.cpp */; }; 481FB55D1AC1B6690076CFF3 /* sharedchao1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */; }; 481FB55E1AC1B66D0076CFF3 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */; }; 481FB55F1AC1B6750076CFF3 /* sharedjclass.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F812D37EC400DA6239 /* sharedjclass.cpp */; }; 481FB5601AC1B6790076CFF3 /* sharedjest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */; }; 481FB5611AC1B69B0076CFF3 /* sharedjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7222D721856277C0055A993 /* sharedjsd.cpp */; }; 481FB5621AC1B69E0076CFF3 /* sharedkstest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */; }; 481FB5631AC1B6A10076CFF3 /* sharedkulczynski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */; }; 481FB5641AC1B6A40076CFF3 /* sharedkulczynskicody.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80012D37EC400DA6239 /* sharedkulczynskicody.cpp */; }; 481FB5651AC1B6A70076CFF3 /* sharedlennon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80212D37EC400DA6239 /* sharedlennon.cpp */; }; 481FB5661AC1B6AA0076CFF3 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80612D37EC400DA6239 /* sharedmarczewski.cpp */; }; 481FB5671AC1B6AD0076CFF3 /* sharedmorisitahorn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80812D37EC400DA6239 /* sharedmorisitahorn.cpp */; }; 481FB5681AC1B6B20076CFF3 /* sharedochiai.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80B12D37EC400DA6239 /* sharedochiai.cpp */; }; 481FB5691AC1B6B50076CFF3 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC119BE32C50075E977 /* sharedrjsd.cpp */; }; 481FB56A1AC1B6B80076CFF3 /* sharedsobs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81512D37EC400DA6239 /* sharedsobs.cpp */; }; 481FB56B1AC1B6BB0076CFF3 /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81712D37EC400DA6239 /* sharedsobscollectsummary.cpp */; }; 481FB56C1AC1B6BE0076CFF3 /* sharedsorabund.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81912D37EC400DA6239 /* sharedsorabund.cpp */; }; 481FB56D1AC1B6C10076CFF3 /* sharedsorclass.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81B12D37EC400DA6239 /* sharedsorclass.cpp */; }; 481FB56E1AC1B6C30076CFF3 /* sharedsorest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81D12D37EC400DA6239 /* sharedsorest.cpp */; }; 481FB56F1AC1B6C70076CFF3 /* sharedthetan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81F12D37EC400DA6239 /* sharedthetan.cpp */; }; 481FB5701AC1B6CA0076CFF3 /* sharedthetayc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82112D37EC400DA6239 /* sharedthetayc.cpp */; }; 481FB5711AC1B6D40076CFF3 /* shen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82512D37EC400DA6239 /* shen.cpp */; }; 481FB5721AC1B6D40076CFF3 /* simpson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82912D37EC400DA6239 /* simpson.cpp */; }; 481FB5731AC1B6EA0076CFF3 /* simpsoneven.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82B12D37EC400DA6239 /* simpsoneven.cpp */; }; 481FB5741AC1B6EA0076CFF3 /* smithwilson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83212D37EC400DA6239 /* smithwilson.cpp */; }; 481FB5751AC1B6EA0076CFF3 /* soergel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83512D37EC400DA6239 /* soergel.cpp */; }; 481FB5761AC1B6EA0076CFF3 /* solow.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83712D37EC400DA6239 /* solow.cpp */; }; 481FB5771AC1B6EA0076CFF3 /* spearman.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83B12D37EC400DA6239 /* spearman.cpp */; }; 481FB5781AC1B6EA0076CFF3 /* speciesprofile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83D12D37EC400DA6239 /* speciesprofile.cpp */; }; 481FB5791AC1B6EA0076CFF3 /* structchi2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84512D37EC400DA6239 /* structchi2.cpp */; }; 481FB57A1AC1B6EA0076CFF3 /* structchord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84712D37EC400DA6239 /* structchord.cpp */; }; 481FB57B1AC1B6EA0076CFF3 /* structeuclidean.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84912D37EC400DA6239 /* structeuclidean.cpp */; }; 481FB57C1AC1B6EA0076CFF3 /* structkulczynski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84B12D37EC400DA6239 /* structkulczynski.cpp */; }; 481FB57D1AC1B6EA0076CFF3 /* structpearson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84D12D37EC400DA6239 /* structpearson.cpp */; }; 481FB57E1AC1B6EA0076CFF3 /* unweighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87012D37EC400DA6239 /* unweighted.cpp */; }; 481FB57F1AC1B6EA0076CFF3 /* uvest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87212D37EC400DA6239 /* uvest.cpp */; }; 481FB5801AC1B6EA0076CFF3 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; }; 481FB5811AC1B6EA0076CFF3 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; }; 481FB5821AC1B6FF0076CFF3 /* bellerophon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */; }; 481FB5831AC1B6FF0076CFF3 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67412D37EC400DA6239 /* ccode.cpp */; }; 481FB5841AC1B6FF0076CFF3 /* mothurchimera.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67812D37EC400DA6239 /* mothurchimera.cpp */; }; 481FB5851AC1B6FF0076CFF3 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68012D37EC400DA6239 /* chimeracheckrdp.cpp */; }; 481FB5861AC1B6FF0076CFF3 /* chimerarealigner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68412D37EC400DA6239 /* chimerarealigner.cpp */; }; 481FB5871AC1B6FF0076CFF3 /* decalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C112D37EC400DA6239 /* decalc.cpp */; }; 481FB5881AC1B6FF0076CFF3 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */; }; 481FB5891AC1B6FF0076CFF3 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74512D37EC400DA6239 /* maligner.cpp */; }; 481FB58A1AC1B6FF0076CFF3 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; }; 481FB58B1AC1B6FF0076CFF3 /* pintail.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79312D37EC400DA6239 /* pintail.cpp */; }; 481FB58C1AC1B6FF0076CFF3 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82E12D37EC400DA6239 /* slayer.cpp */; }; 481FB58D1AC1B7060076CFF3 /* collect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A612D37EC400DA6239 /* collect.cpp */; }; 481FB58E1AC1B7060076CFF3 /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F98E4C1A9CFD670005E81B /* completelinkage.cpp */; }; 481FB58F1AC1B71B0076CFF3 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; }; 481FB5901AC1B71B0076CFF3 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; 481FB5911AC1B71B0076CFF3 /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; 481FB5921AC1B71B0076CFF3 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; }; 481FB5931AC1B71B0076CFF3 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */; }; 481FB5951AC1B71B0076CFF3 /* chimerabellerophoncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67A12D37EC400DA6239 /* chimerabellerophoncommand.cpp */; }; 481FB5961AC1B71B0076CFF3 /* chimeraccodecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67C12D37EC400DA6239 /* chimeraccodecommand.cpp */; }; 481FB5971AC1B71B0076CFF3 /* chimeracheckcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67E12D37EC400DA6239 /* chimeracheckcommand.cpp */; }; 481FB5981AC1B71B0076CFF3 /* chimerapintailcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */; }; 481FB5991AC1B71B0076CFF3 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; }; 481FB59A1AC1B71B0076CFF3 /* chimeraslayercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */; }; 481FB59B1AC1B71B0076CFF3 /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; 481FB59C1AC1B71B0076CFF3 /* chopseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */; }; 481FB59D1AC1B71B0076CFF3 /* classifyotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */; }; 481FB59E1AC1B71B0076CFF3 /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */; }; 481FB5A01AC1B71B0076CFF3 /* classifysvmsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */; }; 481FB5A11AC1B71B0076CFF3 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; }; 481FB5A21AC1B71B0076CFF3 /* clearcutcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */; }; 481FB5A41AC1B7300076CFF3 /* clustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */; }; 481FB5A51AC1B7300076CFF3 /* clusterdoturcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69E12D37EC400DA6239 /* clusterdoturcommand.cpp */; }; 481FB5A61AC1B7300076CFF3 /* clusterfragmentscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A012D37EC400DA6239 /* clusterfragmentscommand.cpp */; }; 481FB5A71AC1B7300076CFF3 /* clustersplitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A212D37EC400DA6239 /* clustersplitcommand.cpp */; }; 481FB5A81AC1B7300076CFF3 /* collectcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */; }; 481FB5A91AC1B7300076CFF3 /* collectsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */; }; 481FB5AA1AC1B7300076CFF3 /* consensusseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */; }; 481FB5AB1AC1B7300076CFF3 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; 481FB5AC1AC1B7300076CFF3 /* corraxescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */; }; 481FB5AD1AC1B7300076CFF3 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; }; 481FB5AE1AC1B7300076CFF3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; }; 481FB5AF1AC1B7300076CFF3 /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; }; 481FB5B01AC1B7300076CFF3 /* uniqueseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C312D37EC400DA6239 /* uniqueseqscommand.cpp */; }; 481FB5B11AC1B7300076CFF3 /* degapseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C512D37EC400DA6239 /* degapseqscommand.cpp */; }; 481FB5B21AC1B7300076CFF3 /* deuniqueseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C712D37EC400DA6239 /* deuniqueseqscommand.cpp */; }; 481FB5B31AC1B7300076CFF3 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; 481FB5B41AC1B7300076CFF3 /* distancecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CB12D37EC400DA6239 /* distancecommand.cpp */; }; 481FB5B51AC1B7300076CFF3 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */; }; 481FB5B61AC1B74F0076CFF3 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; }; 481FB5B81AC1B74F0076CFF3 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; }; 481FB5B91AC1B74F0076CFF3 /* getcurrentcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */; }; 481FB5BA1AC1B74F0076CFF3 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; }; 481FB5BB1AC1B74F0076CFF3 /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */; }; 481FB5BC1AC1B74F0076CFF3 /* getgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */; }; 481FB5BD1AC1B74F0076CFF3 /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */; }; 481FB5BE1AC1B74F0076CFF3 /* getmetacommunitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */; }; 481FB5BF1AC1B74F0076CFF3 /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */; }; 481FB5C01AC1B74F0076CFF3 /* getlineagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */; }; 481FB5C11AC1B74F0076CFF3 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FA12D37EC400DA6239 /* getlistcountcommand.cpp */; }; 481FB5C21AC1B74F0076CFF3 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */; }; 481FB5C41AC1B74F0076CFF3 /* getotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotuscommand.cpp */; }; 481FB5C51AC1B74F0076CFF3 /* getrabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */; }; 481FB5C61AC1B74F0076CFF3 /* getrelabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70412D37EC400DA6239 /* getrelabundcommand.cpp */; }; 481FB5C71AC1B74F0076CFF3 /* getsabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70612D37EC400DA6239 /* getsabundcommand.cpp */; }; 481FB5C81AC1B74F0076CFF3 /* getseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */; }; 481FB5C91AC1B74F0076CFF3 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */; }; 481FB5CB1AC1B74F0076CFF3 /* heatmapcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */; }; 481FB5CC1AC1B74F0076CFF3 /* heatmapsimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72212D37EC400DA6239 /* heatmapsimcommand.cpp */; }; 481FB5CD1AC1B74F0076CFF3 /* helpcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72812D37EC400DA6239 /* helpcommand.cpp */; }; 481FB5CE1AC1B75C0076CFF3 /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; 481FB5CF1AC1B75C0076CFF3 /* indicatorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */; }; 481FB5D01AC1B75C0076CFF3 /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; }; 481FB5D11AC1B75C0076CFF3 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; }; 481FB5D21AC1B75C0076CFF3 /* libshuffcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */; }; 481FB5D31AC1B75C0076CFF3 /* listotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotuscommand.cpp */; }; 481FB5D41AC1B75C0076CFF3 /* listseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */; }; 481FB5D61AC1B75C0076CFF3 /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; }; 481FB5D71AC1B75C0076CFF3 /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; }; 481FB5D81AC1B75C0076CFF3 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; }; 481FB5D91AC1B75C0076CFF3 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; 481FB5DA1AC1B75C0076CFF3 /* makegroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */; }; 481FB5DB1AC1B75C0076CFF3 /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; }; 481FB5DC1AC1B75C0076CFF3 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; }; 481FB5DD1AC1B77E0076CFF3 /* distsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74912D37EC400DA6239 /* distsharedcommand.cpp */; }; 481FB5DE1AC1B77E0076CFF3 /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */; }; 481FB5DF1AC1B77E0076CFF3 /* mergefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */; }; 481FB5E01AC1B77E0076CFF3 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; }; 481FB5E11AC1B77E0076CFF3 /* mergetaxsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */; }; 481FB5E21AC1B77E0076CFF3 /* metastatscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */; }; 481FB5E31AC1B77E0076CFF3 /* mgclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */; }; 481FB5E41AC1B77E0076CFF3 /* mimarksattributescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */; }; 481FB5E51AC1B77E0076CFF3 /* nocommands.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76912D37EC400DA6239 /* nocommands.cpp */; }; 481FB5E61AC1B77E0076CFF3 /* normalizesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */; }; 481FB5E71AC1B77E0076CFF3 /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; 481FB5E81AC1B77E0076CFF3 /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; }; 481FB5E91AC1B77E0076CFF3 /* otuhierarchycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */; }; 481FB5EA1AC1B77E0076CFF3 /* pairwiseseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */; }; 481FB5EB1AC1B77E0076CFF3 /* fastaqinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77F12D37EC400DA6239 /* fastaqinfocommand.cpp */; }; 481FB5ED1AC1B77E0076CFF3 /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */; }; 481FB5EE1AC1B77E0076CFF3 /* pcacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC486612D795D60055BC5C /* pcacommand.cpp */; }; 481FB5EF1AC1B77E0076CFF3 /* pcoacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */; }; 481FB5F11AC1B77E0076CFF3 /* phylodiversitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */; }; 481FB5F21AC1B77E0076CFF3 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */; }; 481FB5F41AC1B77E0076CFF3 /* preclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */; }; 481FB5F51AC1B77E0076CFF3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; }; 481FB5F61AC1B77E0076CFF3 /* quitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */; }; 481FB5F71AC1B77E0076CFF3 /* rarefactcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */; }; 481FB5F81AC1B77E0076CFF3 /* rarefactsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */; }; 481FB5F91AC1B77E0076CFF3 /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; }; 481FB5FA1AC1B77E0076CFF3 /* removegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */; }; 481FB5FB1AC1B77E0076CFF3 /* removelineagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */; }; 481FB5FD1AC1B7970076CFF3 /* removeotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotuscommand.cpp */; }; 481FB5FE1AC1B7970076CFF3 /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; }; 481FB5FF1AC1B7970076CFF3 /* removeseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */; }; 481FB6001AC1B7970076CFF3 /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; }; 481FB6011AC1B7970076CFF3 /* reversecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */; }; 481FB6021AC1B7970076CFF3 /* screenseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D112D37EC400DA6239 /* screenseqscommand.cpp */; }; 481FB6031AC1B7970076CFF3 /* aligncheckcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D312D37EC400DA6239 /* aligncheckcommand.cpp */; }; 481FB6041AC1B7970076CFF3 /* sensspeccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D512D37EC400DA6239 /* sensspeccommand.cpp */; }; 481FB6051AC1B7970076CFF3 /* seqerrorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D712D37EC400DA6239 /* seqerrorcommand.cpp */; }; 481FB6061AC1B7970076CFF3 /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D912D37EC400DA6239 /* seqsummarycommand.cpp */; }; 481FB6071AC1B7970076CFF3 /* setcurrentcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FE7E6C13311EA400F7B327 /* setcurrentcommand.cpp */; }; 481FB6081AC1B7970076CFF3 /* setdircommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DF12D37EC400DA6239 /* setdircommand.cpp */; }; 481FB6091AC1B7970076CFF3 /* setlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E112D37EC400DA6239 /* setlogfilecommand.cpp */; }; 481FB60A1AC1B7970076CFF3 /* sffinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E312D37EC400DA6239 /* sffinfocommand.cpp */; }; 481FB60B1AC1B7AC0076CFF3 /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; }; 481FB60C1AC1B7AC0076CFF3 /* makesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F212D37EC400DA6239 /* makesharedcommand.cpp */; }; 481FB60D1AC1B7AC0076CFF3 /* shhhercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82712D37EC400DA6239 /* shhhercommand.cpp */; }; 481FB60E1AC1B7AC0076CFF3 /* shhhseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774101314695AF60098E6AC /* shhhseqscommand.cpp */; }; 481FB60F1AC1B7AC0076CFF3 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; }; 481FB6101AC1B7AC0076CFF3 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; }; 481FB6111AC1B7AC0076CFF3 /* splitabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */; }; 481FB6121AC1B7AC0076CFF3 /* splitgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */; }; 481FB6131AC1B7AC0076CFF3 /* sracommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A747EC70181EA0F900345732 /* sracommand.cpp */; }; 481FB6141AC1B7AC0076CFF3 /* subsamplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */; }; 481FB6151AC1B7AC0076CFF3 /* summarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85712D37EC400DA6239 /* summarycommand.cpp */; }; 481FB6161AC1B7AC0076CFF3 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; }; 481FB6171AC1B7AC0076CFF3 /* summarysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */; }; 481FB6181AC1B7AC0076CFF3 /* summarytaxcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FFB557142CA02C004884F2 /* summarytaxcommand.cpp */; }; 481FB6191AC1B7AC0076CFF3 /* systemcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85B12D37EC400DA6239 /* systemcommand.cpp */; }; 481FB61A1AC1B7AC0076CFF3 /* treesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86212D37EC400DA6239 /* treesharedcommand.cpp */; }; 481FB61B1AC1B7AC0076CFF3 /* trimflowscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86812D37EC400DA6239 /* trimflowscommand.cpp */; }; 481FB61C1AC1B7AC0076CFF3 /* trimseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86A12D37EC400DA6239 /* trimseqscommand.cpp */; }; 481FB61D1AC1B7AC0076CFF3 /* unifracunweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86C12D37EC400DA6239 /* unifracunweightedcommand.cpp */; }; 481FB61E1AC1B7AC0076CFF3 /* unifracweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86E12D37EC400DA6239 /* unifracweightedcommand.cpp */; }; 481FB61F1AC1B7AC0076CFF3 /* venncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87A12D37EC400DA6239 /* venncommand.cpp */; }; 481FB6201AC1B7B30076CFF3 /* commandoptionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */; }; 481FB6211AC1B7BA0076CFF3 /* communitytype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7132EB2184E792700AAA402 /* communitytype.cpp */; }; 481FB6221AC1B7BA0076CFF3 /* kmeans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D395C3184FA3A200A350D7 /* kmeans.cpp */; }; 481FB6231AC1B7BA0076CFF3 /* pam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B093BF18579F0400843CD1 /* pam.cpp */; }; 481FB6241AC1B7BA0076CFF3 /* qFinderDMM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */; }; 481FB6251AC1B7EA0076CFF3 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; 481FB6261AC1B7EA0076CFF3 /* alignmentcell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65512D37EC300DA6239 /* alignmentcell.cpp */; }; 481FB6271AC1B7EA0076CFF3 /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65712D37EC300DA6239 /* alignmentdb.cpp */; }; 481FB62A1AC1B7EA0076CFF3 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; }; 481FB62C1AC1B7EA0076CFF3 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; }; 481FB62D1AC1B7EA0076CFF3 /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */; }; 481FB62E1AC1B7EA0076CFF3 /* fastamap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */; }; 481FB62F1AC1B7EA0076CFF3 /* fastqread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C51DEF1A76B888004ECDF1 /* fastqread.cpp */; }; 481FB6301AC1B7EA0076CFF3 /* flowdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E712D37EC400DA6239 /* flowdata.cpp */; }; 481FB6311AC1B7EA0076CFF3 /* fullmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6EE12D37EC400DA6239 /* fullmatrix.cpp */; }; 481FB6321AC1B7EA0076CFF3 /* groupmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71412D37EC400DA6239 /* groupmap.cpp */; }; 481FB6331AC1B7EA0076CFF3 /* kmer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73312D37EC400DA6239 /* kmer.cpp */; }; 481FB6341AC1B7EA0076CFF3 /* kmeralign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C51DF11A793EFE004ECDF1 /* kmeralign.cpp */; }; 481FB6351AC1B7EA0076CFF3 /* kmerdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73512D37EC400DA6239 /* kmerdb.cpp */; }; 481FB6361AC1B7EA0076CFF3 /* listvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73F12D37EC400DA6239 /* listvector.cpp */; }; 481FB6371AC1B7EA0076CFF3 /* nameassignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */; }; 481FB6381AC1B7EA0076CFF3 /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABD19BE32C50075E977 /* oligos.cpp */; }; 481FB6391AC1B7EA0076CFF3 /* ordervector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77712D37EC400DA6239 /* ordervector.cpp */; }; 481FB63A1AC1B7EA0076CFF3 /* qualityscores.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */; }; 481FB63B1AC1B7EA0076CFF3 /* rabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */; }; 481FB63E1AC1B7EA0076CFF3 /* sabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */; }; 481FB63F1AC1B7EA0076CFF3 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; }; 481FB6401AC1B7EA0076CFF3 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DD12D37EC400DA6239 /* sequencedb.cpp */; }; 481FB6411AC1B7EA0076CFF3 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; }; 481FB6421AC1B7EA0076CFF3 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80412D37EC400DA6239 /* sharedlistvector.cpp */; }; 481FB6431AC1B7EA0076CFF3 /* sharedordervector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80D12D37EC400DA6239 /* sharedordervector.cpp */; }; 481FB6471AC1B7EA0076CFF3 /* sparsematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */; }; 481FB6481AC1B7EA0076CFF3 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; }; 481FB6491AC1B7F40076CFF3 /* suffixdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85112D37EC400DA6239 /* suffixdb.cpp */; }; 481FB64A1AC1B7F40076CFF3 /* suffixnodes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */; }; 481FB64B1AC1B7F40076CFF3 /* suffixtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85512D37EC400DA6239 /* suffixtree.cpp */; }; 481FB64C1AC1B7F40076CFF3 /* tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85F12D37EC400DA6239 /* tree.cpp */; }; 481FB64D1AC1B7F40076CFF3 /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86412D37EC400DA6239 /* treemap.cpp */; }; 481FB64E1AC1B7F40076CFF3 /* treenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86612D37EC400DA6239 /* treenode.cpp */; }; 481FB64F1AC1B8100076CFF3 /* consensus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensus.cpp */; }; 481FB6501AC1B8100076CFF3 /* dlibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */; }; 481FB6521AC1B8100076CFF3 /* fileoutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */; }; 481FB6531AC1B8100076CFF3 /* gotohoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */; }; 481FB6551AC1B8100076CFF3 /* heatmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71C12D37EC400DA6239 /* heatmap.cpp */; }; 481FB6561AC1B8100076CFF3 /* heatmapsim.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */; }; 481FB6571AC1B8100076CFF3 /* inputdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72D12D37EC400DA6239 /* inputdata.cpp */; }; 481FB6581AC1B8100076CFF3 /* libshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73912D37EC400DA6239 /* libshuff.cpp */; }; 481FB6591AC1B8100076CFF3 /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; }; 481FB65A1AC1B8100076CFF3 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; }; 481FB65B1AC1B82C0076CFF3 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; }; 481FB65C1AC1B82C0076CFF3 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; }; 481FB65F1AC1B8450076CFF3 /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; }; 481FB6601AC1B8450076CFF3 /* nast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76112D37EC400DA6239 /* nast.cpp */; }; 481FB6611AC1B8450076CFF3 /* alignreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76312D37EC400DA6239 /* alignreport.cpp */; }; 481FB6621AC1B8450076CFF3 /* noalign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76712D37EC400DA6239 /* noalign.cpp */; }; 481FB6631AC1B8450076CFF3 /* needlemanoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76512D37EC400DA6239 /* needlemanoverlap.cpp */; }; 481FB6641AC1B8450076CFF3 /* optionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77512D37EC400DA6239 /* optionparser.cpp */; }; 481FB6651AC1B8450076CFF3 /* overlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77B12D37EC400DA6239 /* overlap.cpp */; }; 481FB6701AC1B8820076CFF3 /* raredisplay.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A712D37EC400DA6239 /* raredisplay.cpp */; }; 481FB6711AC1B8820076CFF3 /* rarefact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A912D37EC400DA6239 /* rarefact.cpp */; }; 481FB6721AC1B8820076CFF3 /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; }; 481FB6731AC1B8820076CFF3 /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; }; 481FB6761AC1B88F0076CFF3 /* readblast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B012D37EC400DA6239 /* readblast.cpp */; }; 481FB6771AC1B88F0076CFF3 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B212D37EC400DA6239 /* readcluster.cpp */; }; 481FB6781AC1B88F0076CFF3 /* readcolumn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */; }; 481FB6791AC1B88F0076CFF3 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */; }; 481FB67A1AC1B88F0076CFF3 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BF12D37EC400DA6239 /* readtree.cpp */; }; 481FB67B1AC1B88F0076CFF3 /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; 481FB67C1AC1B88F0076CFF3 /* splitmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */; }; 481FB67D1AC1B88F0076CFF3 /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; }; 481FB67F1AC1B8960076CFF3 /* singlelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */; }; 481FB6801AC1B8960076CFF3 /* slibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83012D37EC400DA6239 /* slibshuff.cpp */; }; 481FB6811AC1B8960076CFF3 /* subsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7876A25152A017C00A0AE86 /* subsample.cpp */; }; 481FB6821AC1B8AF0076CFF3 /* svm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B21820117AD77BD00286E6A /* svm.cpp */; }; 481FB6831AC1B8B80076CFF3 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; }; 481FB6841AC1B8B80076CFF3 /* trimoligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FF19F1140FFDA500AD216D /* trimoligos.cpp */; }; 481FB6851AC1B8B80076CFF3 /* validcalculator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87412D37EC400DA6239 /* validcalculator.cpp */; }; 481FB6861AC1B8B80076CFF3 /* validparameter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87612D37EC400DA6239 /* validparameter.cpp */; }; 481FB6871AC1B8B80076CFF3 /* venn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87812D37EC400DA6239 /* venn.cpp */; }; 481FB6881AC1B8B80076CFF3 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; }; 481FB6891AC1BA760076CFF3 /* phylosummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */; }; 481FB68A1AC1BA9E0076CFF3 /* alignnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB66161C570F009860A1 /* alignnode.cpp */; }; 481FB68B1AC1BA9E0076CFF3 /* aligntree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB68161C570F009860A1 /* aligntree.cpp */; }; 481FB68C1AC1BA9E0076CFF3 /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65A12D37EC300DA6239 /* bayesian.cpp */; }; 481FB68D1AC1BA9E0076CFF3 /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68E12D37EC400DA6239 /* classify.cpp */; }; 481FB68E1AC1BA9E0076CFF3 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; }; 481FB68F1AC1BA9E0076CFF3 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; }; 481FB6901AC1BA9E0076CFF3 /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73712D37EC400DA6239 /* knn.cpp */; }; 481FB6911AC1BAA60076CFF3 /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78F12D37EC400DA6239 /* phylotree.cpp */; }; 481FB6921AC1BAA60076CFF3 /* taxonomyequalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */; }; 481FB6931AC1BAA60076CFF3 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; }; 4827A4DC1CB3ED2200345170 /* fastqdataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4827A4DA1CB3ED2100345170 /* fastqdataset.cpp */; }; 4829D9671B8387D0002EEED4 /* testbiominfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4829D9651B8387D0002EEED4 /* testbiominfocommand.cpp */; }; 482AC3B92562B57600C9AF4A /* picrust.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 482AC3B72562B57600C9AF4A /* picrust.cpp */; }; 482AC3BA2562B57600C9AF4A /* picrust.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 482AC3B72562B57600C9AF4A /* picrust.cpp */; }; 483A9BAE225BBE55006102DF /* metroig.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483A9BAC225BBE55006102DF /* metroig.cpp */; }; 483A9BAF225BBE55006102DF /* metroig.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483A9BAC225BBE55006102DF /* metroig.cpp */; }; 483C952E188F0CAD0035E7B7 /* (null) in Sources */ = {isa = PBXBuildFile; }; 484976DF22552E0B00F3A291 /* erarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976DD22552E0B00F3A291 /* erarefaction.cpp */; }; 484976E022552E0B00F3A291 /* erarefaction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976DD22552E0B00F3A291 /* erarefaction.cpp */; }; 484976E32255412400F3A291 /* igabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976E12255412400F3A291 /* igabundance.cpp */; }; 484976E42255412400F3A291 /* igabundance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976E12255412400F3A291 /* igabundance.cpp */; }; 484976E72256799100F3A291 /* diversityestimatorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976E52256799100F3A291 /* diversityestimatorcommand.cpp */; }; 484976E82256799100F3A291 /* diversityestimatorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 484976E52256799100F3A291 /* diversityestimatorcommand.cpp */; }; 48576EA11D05DBC600BBC9C0 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2114A7671C654D7400D3D8D9 /* averagelinkage.cpp */; }; 48576EA21D05DBCD00BBC9C0 /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; }; 48576EA51D05E8F600BBC9C0 /* testoptimatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48576EA31D05E8F600BBC9C0 /* testoptimatrix.cpp */; }; 48576EA81D05F59300BBC9C0 /* distpdataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48576EA61D05F59300BBC9C0 /* distpdataset.cpp */; }; 485B0E081F264F2E00CA5F57 /* sharedrabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 485B0E061F264F2E00CA5F57 /* sharedrabundvector.cpp */; }; 485B0E0E1F27C40500CA5F57 /* sharedrabundfloatvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 485B0E0C1F27C40500CA5F57 /* sharedrabundfloatvector.cpp */; }; 48705AC419BE32C50075E977 /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */; }; 48705AC519BE32C50075E977 /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABD19BE32C50075E977 /* oligos.cpp */; }; 48705AC619BE32C50075E977 /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */; }; 48705AC719BE32C50075E977 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC119BE32C50075E977 /* sharedrjsd.cpp */; }; 487C5A871AB88B93002AF48A /* mimarksattributescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */; }; 487D09EC1CB2CEFE007039BF /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2114A7671C654D7400D3D8D9 /* averagelinkage.cpp */; }; 488563D123CD00C4007B5659 /* taxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488563CF23CD00C4007B5659 /* taxonomy.cpp */; }; 488563D223CD00C4007B5659 /* taxonomy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488563CF23CD00C4007B5659 /* taxonomy.cpp */; }; 488841611CC515A000C5E972 /* (null) in Sources */ = {isa = PBXBuildFile; }; 488841621CC515A000C5E972 /* (null) in Sources */ = {isa = PBXBuildFile; }; 488841651CC6C34900C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; }; 488841661CC6C35500C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; }; 4889EA221E8962D50054E0BB /* summary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4889EA201E8962D50054E0BB /* summary.cpp */; }; 488C1DEA242D102B00BDCCB4 /* optidb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488C1DE8242D102B00BDCCB4 /* optidb.cpp */; }; 488C1DEB242D102B00BDCCB4 /* optidb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488C1DE8242D102B00BDCCB4 /* optidb.cpp */; }; 48910D431D5243E500F60EDB /* mergecountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D411D5243E500F60EDB /* mergecountcommand.cpp */; }; 48910D441D5243E500F60EDB /* mergecountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D411D5243E500F60EDB /* mergecountcommand.cpp */; }; 48910D461D58CAD700F60EDB /* opticluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D451D58CAD700F60EDB /* opticluster.cpp */; }; 48910D4B1D58CBA300F60EDB /* optimatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D491D58CBA300F60EDB /* optimatrix.cpp */; }; 48910D511D58E26C00F60EDB /* testopticluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D4D1D58E26C00F60EDB /* testopticluster.cpp */; }; 48910D521D58E26C00F60EDB /* distcdataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D501D58E26C00F60EDB /* distcdataset.cpp */; }; 489387F62107A60C00284329 /* testoptirefmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489387F42107A60C00284329 /* testoptirefmatrix.cpp */; }; 489387F9210F633E00284329 /* testOligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489387F7210F633E00284329 /* testOligos.cpp */; }; 489387FA2110C79200284329 /* testtrimoligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */; }; 4893DE2918EEF28100C615DF /* (null) in Sources */ = {isa = PBXBuildFile; }; 48998B69242E785100DBD0A9 /* onegapdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48998B68242E785100DBD0A9 /* onegapdist.cpp */; }; 48998B6A242E785100DBD0A9 /* onegapdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48998B68242E785100DBD0A9 /* onegapdist.cpp */; }; 489AF68F2106188E0028155E /* sensspeccalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B01D2A2016470F006BE140 /* sensspeccalc.cpp */; }; 489AF690210618A80028155E /* optiblastmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99CA20A4AD7D00FF9F6E /* optiblastmatrix.cpp */; }; 489AF691210619140028155E /* sharedrabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 485B0E061F264F2E00CA5F57 /* sharedrabundvector.cpp */; }; 489AF692210619170028155E /* sharedrabundfloatvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 485B0E0C1F27C40500CA5F57 /* sharedrabundfloatvector.cpp */; }; 489AF6932106192E0028155E /* clusterfitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B01D2720163594006BE140 /* clusterfitcommand.cpp */; }; 489AF694210619410028155E /* optirefmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99C3209B69FA00FF9F6E /* optirefmatrix.cpp */; }; 489AF6952106194A0028155E /* optifitcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99CD20A4F3FB00FF9F6E /* optifitcluster.cpp */; }; 489AF6962106195E0028155E /* optidata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99C720A48EF700FF9F6E /* optidata.cpp */; }; 48A055302490066C00D0F97F /* sffread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A0552E2490066C00D0F97F /* sffread.cpp */; }; 48A055332491577800D0F97F /* sffheader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A055312491577800D0F97F /* sffheader.cpp */; }; 48A0B8EC2547282600726384 /* biom.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A0B8EA2547282600726384 /* biom.cpp */; }; 48A0B8F125472C4500726384 /* biomhdf5.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A0B8EF25472C4500726384 /* biomhdf5.cpp */; }; 48A0B8F625472C6500726384 /* biomsimple.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A0B8F425472C6500726384 /* biomsimple.cpp */; }; 48A11C6E1CDA40F0003481D8 /* testrenamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */; }; 48A85BAD18E1AF2000199B6F /* (null) in Sources */ = {isa = PBXBuildFile; }; 48B01D2920163594006BE140 /* clusterfitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B01D2720163594006BE140 /* clusterfitcommand.cpp */; }; 48B01D2C2016470F006BE140 /* sensspeccalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B01D2A2016470F006BE140 /* sensspeccalc.cpp */; }; 48B44EEE1FB5006500789C45 /* currentfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B44EED1FB5006500789C45 /* currentfile.cpp */; }; 48B44EEF1FB5006500789C45 /* currentfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B44EED1FB5006500789C45 /* currentfile.cpp */; }; 48B44EF21FB9EF8200789C45 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B44EF01FB9EF8200789C45 /* utils.cpp */; }; 48B44EF31FB9EF8200789C45 /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B44EF01FB9EF8200789C45 /* utils.cpp */; }; 48B662031BBB1B6600997EE4 /* testrenameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */; }; 48BD4EB821F7724C008EA73D /* filefile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BD4EB621F7724C008EA73D /* filefile.cpp */; }; 48BD4EB921F77258008EA73D /* filefile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BD4EB621F7724C008EA73D /* filefile.cpp */; }; 48BDDA711EC9D31400F0F6C0 /* sharedrabundvectors.hpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA6F1EC9D31400F0F6C0 /* sharedrabundvectors.hpp */; }; 48BDDA721EC9D31400F0F6C0 /* sharedrabundvectors.hpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA6F1EC9D31400F0F6C0 /* sharedrabundvectors.hpp */; }; 48BDDA751ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA731ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp */; }; 48BDDA761ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA731ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp */; }; 48BDDA791ECA3B8E00F0F6C0 /* rabundfloatvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA771ECA3B8E00F0F6C0 /* rabundfloatvector.cpp */; }; 48BDDA7A1ECA3B8E00F0F6C0 /* rabundfloatvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA771ECA3B8E00F0F6C0 /* rabundfloatvector.cpp */; }; 48C1DDC61D25C1BC00B5BA9D /* (null) in Sources */ = {isa = PBXBuildFile; }; 48C1DDC71D25C1BC00B5BA9D /* (null) in Sources */ = {isa = PBXBuildFile; }; 48C51DF01A76B888004ECDF1 /* fastqread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C51DEF1A76B888004ECDF1 /* fastqread.cpp */; }; 48C51DF31A793EFE004ECDF1 /* kmeralign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C51DF11A793EFE004ECDF1 /* kmeralign.cpp */; }; 48C728651B66A77800D40830 /* testsequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C728641B66A77800D40830 /* testsequence.cpp */; }; 48C728671B66AB8800D40830 /* pcrseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 481623E11B56A2DB004C60B7 /* pcrseqscommand.cpp */; }; 48C7286A1B69598400D40830 /* testmergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C728681B69598400D40830 /* testmergegroupscommand.cpp */; }; 48C728721B6AB3B900D40830 /* testremovegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C7286F1B6AB3B900D40830 /* testremovegroupscommand.cpp */; }; 48C728751B6AB4CD00D40830 /* testgetgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C728731B6AB4CD00D40830 /* testgetgroupscommand.cpp */; }; 48C728791B728D6B00D40830 /* biominfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C728771B728D6B00D40830 /* biominfocommand.cpp */; }; 48C7287A1B728D6B00D40830 /* biominfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C728771B728D6B00D40830 /* biominfocommand.cpp */; }; 48CF76F021BEBDD300B2FB5C /* mergeotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48CF76EE21BEBDD300B2FB5C /* mergeotuscommand.cpp */; }; 48CF76F121BEBDE000B2FB5C /* mergeotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48CF76EE21BEBDD300B2FB5C /* mergeotuscommand.cpp */; }; 48D36FC924C1EAB0001A0FDC /* (null) in Sources */ = {isa = PBXBuildFile; }; 48D6E9681CA42389008DF76B /* testvsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48D6E9661CA42389008DF76B /* testvsearchfileparser.cpp */; }; 48D6E96B1CA4262A008DF76B /* dataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48D6E9691CA4262A008DF76B /* dataset.cpp */; }; 48DB37B31B3B27E000C372A4 /* makefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */; }; 48DB37B41B3B27E000C372A4 /* makefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */; }; 48E0230324BF488D00BFEA41 /* report.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E0230124BF488D00BFEA41 /* report.cpp */; }; 48E418561D08893A004C36AB /* (null) in Sources */ = {isa = PBXBuildFile; }; 48E543EB1E8F15A500FF6AB8 /* summary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4889EA201E8962D50054E0BB /* summary.cpp */; }; 48E543EC1E8F15B800FF6AB8 /* opticluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D451D58CAD700F60EDB /* opticluster.cpp */; }; 48E543ED1E8F15C800FF6AB8 /* optimatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48910D491D58CBA300F60EDB /* optimatrix.cpp */; }; 48E543EE1E92B91100FF6AB8 /* chimeravsearchcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */; }; 48E544411E9C292900FF6AB8 /* mcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5443F1E9C292900FF6AB8 /* mcc.cpp */; }; 48E544421E9C292900FF6AB8 /* mcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5443F1E9C292900FF6AB8 /* mcc.cpp */; }; 48E544451E9C2B1000FF6AB8 /* sensitivity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544431E9C2B1000FF6AB8 /* sensitivity.cpp */; }; 48E544461E9C2B1000FF6AB8 /* sensitivity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544431E9C2B1000FF6AB8 /* sensitivity.cpp */; }; 48E544491E9C2BE100FF6AB8 /* specificity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544471E9C2BE100FF6AB8 /* specificity.cpp */; }; 48E5444A1E9C2BE100FF6AB8 /* specificity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544471E9C2BE100FF6AB8 /* specificity.cpp */; }; 48E5444D1E9C2C8F00FF6AB8 /* tptn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5444B1E9C2C8F00FF6AB8 /* tptn.cpp */; }; 48E5444E1E9C2C8F00FF6AB8 /* tptn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5444B1E9C2C8F00FF6AB8 /* tptn.cpp */; }; 48E544511E9C2CFD00FF6AB8 /* tp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5444F1E9C2CFD00FF6AB8 /* tp.cpp */; }; 48E544521E9C2CFD00FF6AB8 /* tp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5444F1E9C2CFD00FF6AB8 /* tp.cpp */; }; 48E544551E9C2DF500FF6AB8 /* tn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544531E9C2DF500FF6AB8 /* tn.cpp */; }; 48E544561E9C2DF500FF6AB8 /* tn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544531E9C2DF500FF6AB8 /* tn.cpp */; }; 48E544591E9C2E6500FF6AB8 /* fp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544571E9C2E6500FF6AB8 /* fp.cpp */; }; 48E5445A1E9C2E6500FF6AB8 /* fp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544571E9C2E6500FF6AB8 /* fp.cpp */; }; 48E5445D1E9C2F0F00FF6AB8 /* fn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5445B1E9C2F0F00FF6AB8 /* fn.cpp */; }; 48E5445E1E9C2F0F00FF6AB8 /* fn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5445B1E9C2F0F00FF6AB8 /* fn.cpp */; }; 48E544611E9C2FB800FF6AB8 /* fpfn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5445F1E9C2FB800FF6AB8 /* fpfn.cpp */; }; 48E544621E9C2FB800FF6AB8 /* fpfn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5445F1E9C2FB800FF6AB8 /* fpfn.cpp */; }; 48E5446C1E9D3A8C00FF6AB8 /* f1score.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5446A1E9D3A8C00FF6AB8 /* f1score.cpp */; }; 48E5446D1E9D3A8C00FF6AB8 /* f1score.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5446A1E9D3A8C00FF6AB8 /* f1score.cpp */; }; 48E544701E9D3B2D00FF6AB8 /* accuracy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5446E1E9D3B2D00FF6AB8 /* accuracy.cpp */; }; 48E544711E9D3B2D00FF6AB8 /* accuracy.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5446E1E9D3B2D00FF6AB8 /* accuracy.cpp */; }; 48E544741E9D3C1200FF6AB8 /* ppv.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544721E9D3C1200FF6AB8 /* ppv.cpp */; }; 48E544751E9D3C1200FF6AB8 /* ppv.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544721E9D3C1200FF6AB8 /* ppv.cpp */; }; 48E544781E9D3CE400FF6AB8 /* npv.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544761E9D3CE400FF6AB8 /* npv.cpp */; }; 48E544791E9D3CE400FF6AB8 /* npv.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E544761E9D3CE400FF6AB8 /* npv.cpp */; }; 48E5447C1E9D3F0400FF6AB8 /* fdr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5447A1E9D3F0400FF6AB8 /* fdr.cpp */; }; 48E5447D1E9D3F0400FF6AB8 /* fdr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E5447A1E9D3F0400FF6AB8 /* fdr.cpp */; }; 48E7E0A32278A21B00B74910 /* metrolognormal.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E7E0A12278A21B00B74910 /* metrolognormal.cpp */; }; 48E7E0A62278AD4800B74910 /* diversityutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E7E0A42278AD4800B74910 /* diversityutils.cpp */; }; 48E981CF189C38FB0042BE9D /* (null) in Sources */ = {isa = PBXBuildFile; }; 48ED1E79235E1ACA003E66F7 /* scriptengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E77235E1ACA003E66F7 /* scriptengine.cpp */; }; 48ED1E7A235E1ACA003E66F7 /* scriptengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E77235E1ACA003E66F7 /* scriptengine.cpp */; }; 48ED1E7D235E1BB4003E66F7 /* interactengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E7B235E1BB4003E66F7 /* interactengine.cpp */; }; 48ED1E7E235E1BB4003E66F7 /* interactengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E7B235E1BB4003E66F7 /* interactengine.cpp */; }; 48ED1E81235E1D59003E66F7 /* batchengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E7F235E1D59003E66F7 /* batchengine.cpp */; }; 48ED1E82235E1D59003E66F7 /* batchengine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E7F235E1D59003E66F7 /* batchengine.cpp */; }; 48ED1E8523689DE8003E66F7 /* srainfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E8323689DE8003E66F7 /* srainfocommand.cpp */; }; 48ED1E8623689DE8003E66F7 /* srainfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48ED1E8323689DE8003E66F7 /* srainfocommand.cpp */; }; 48EDB76C1D1320DD00F76E93 /* chimeravsearchcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */; }; 48F06CCD1D74BEC4004A45DD /* testphylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F06CCB1D74BEC4004A45DD /* testphylotree.cpp */; }; 48F1C16623D606050034DAAF /* makeclrcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16423D606050034DAAF /* makeclrcommand.cpp */; }; 48F1C16723D606050034DAAF /* makeclrcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16423D606050034DAAF /* makeclrcommand.cpp */; }; 48F1C16A23D78D7B0034DAAF /* sharedclrvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16823D78D7B0034DAAF /* sharedclrvectors.cpp */; }; 48F1C16B23D78D7B0034DAAF /* sharedclrvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16823D78D7B0034DAAF /* sharedclrvectors.cpp */; }; 48F1C16E23D78F8D0034DAAF /* sharedclrvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16C23D78F8D0034DAAF /* sharedclrvector.cpp */; }; 48F1C16F23D78F8D0034DAAF /* sharedclrvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F1C16C23D78F8D0034DAAF /* sharedclrvector.cpp */; }; 48F98E4D1A9CFD670005E81B /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F98E4C1A9CFD670005E81B /* completelinkage.cpp */; }; 48FB99C5209B69FA00FF9F6E /* optirefmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99C3209B69FA00FF9F6E /* optirefmatrix.cpp */; }; 48FB99C920A48EF700FF9F6E /* optidata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99C720A48EF700FF9F6E /* optidata.cpp */; }; 48FB99CC20A4AD7D00FF9F6E /* optiblastmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99CA20A4AD7D00FF9F6E /* optiblastmatrix.cpp */; }; 48FB99CF20A4F3FB00FF9F6E /* optifitcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48FB99CD20A4F3FB00FF9F6E /* optifitcluster.cpp */; }; 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; }; 835FE03D19F00640005AA754 /* classifysvmsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */; }; 835FE03E19F00A4D005AA754 /* svm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B21820117AD77BD00286E6A /* svm.cpp */; }; A70056E6156A93D000924A2D /* getotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotuscommand.cpp */; }; A70056EB156AB6E500924A2D /* removeotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotuscommand.cpp */; }; A70332B712D3A13400761E33 /* Makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* Makefile */; }; A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; }; A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7132EB2184E792700AAA402 /* communitytype.cpp */; }; A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; }; A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; }; A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; }; A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB66161C570F009860A1 /* alignnode.cpp */; }; A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB68161C570F009860A1 /* aligntree.cpp */; }; A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; }; A721AB72161C572A009860A1 /* kmertree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6F161C572A009860A1 /* kmertree.cpp */; }; A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB73161C573B009860A1 /* taxonomynode.cpp */; }; A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7222D721856277C0055A993 /* sharedjsd.cpp */; }; A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A724D2B6153C8628000A826F /* makebiomcommand.cpp */; }; A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; }; A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; }; A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; }; A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; }; A747EC71181EA0F900345732 /* sracommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A747EC70181EA0F900345732 /* sracommand.cpp */; }; A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; }; A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; }; A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; }; A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; }; A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */; }; A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; }; A774101414695AF60098E6AC /* shhhseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774101314695AF60098E6AC /* shhhseqscommand.cpp */; }; A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; }; A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; }; A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; }; A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; }; A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; }; A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; }; A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7876A25152A017C00A0AE86 /* subsample.cpp */; }; A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; }; A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; }; A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */; }; A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; }; A7A0671A1562946F0095C8C5 /* listotuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotuscommand.cpp */; }; A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; }; A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */; }; A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; }; A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; }; A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; }; A7B093C018579F0400843CD1 /* pam.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B093BF18579F0400843CD1 /* pam.cpp */; }; A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; }; A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; }; A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; }; A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; }; A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; }; A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D395C3184FA3A200A350D7 /* kmeans.cpp */; }; A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; }; A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D9378917B146B5001E90B0 /* wilcox.cpp */; }; A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; }; A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; A7E9B88412D37EC400DA6239 /* alignmentcell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65512D37EC300DA6239 /* alignmentcell.cpp */; }; A7E9B88512D37EC400DA6239 /* alignmentdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65712D37EC300DA6239 /* alignmentdb.cpp */; }; A7E9B88712D37EC400DA6239 /* bayesian.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65A12D37EC300DA6239 /* bayesian.cpp */; }; A7E9B88812D37EC400DA6239 /* bellerophon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */; }; A7E9B88912D37EC400DA6239 /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65E12D37EC300DA6239 /* bergerparker.cpp */; }; A7E9B88A12D37EC400DA6239 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */; }; A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66612D37EC400DA6239 /* boneh.cpp */; }; A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66812D37EC400DA6239 /* bootstrap.cpp */; }; A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66C12D37EC400DA6239 /* bstick.cpp */; }; A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67012D37EC400DA6239 /* canberra.cpp */; }; A7E9B89412D37EC400DA6239 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67412D37EC400DA6239 /* ccode.cpp */; }; A7E9B89512D37EC400DA6239 /* chao1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67612D37EC400DA6239 /* chao1.cpp */; }; A7E9B89612D37EC400DA6239 /* mothurchimera.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67812D37EC400DA6239 /* mothurchimera.cpp */; }; A7E9B89712D37EC400DA6239 /* chimerabellerophoncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67A12D37EC400DA6239 /* chimerabellerophoncommand.cpp */; }; A7E9B89812D37EC400DA6239 /* chimeraccodecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67C12D37EC400DA6239 /* chimeraccodecommand.cpp */; }; A7E9B89912D37EC400DA6239 /* chimeracheckcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67E12D37EC400DA6239 /* chimeracheckcommand.cpp */; }; A7E9B89A12D37EC400DA6239 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68012D37EC400DA6239 /* chimeracheckrdp.cpp */; }; A7E9B89B12D37EC400DA6239 /* chimerapintailcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */; }; A7E9B89C12D37EC400DA6239 /* chimerarealigner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68412D37EC400DA6239 /* chimerarealigner.cpp */; }; A7E9B89E12D37EC400DA6239 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */; }; A7E9B89F12D37EC400DA6239 /* chimeraslayercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */; }; A7E9B8A012D37EC400DA6239 /* chopseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */; }; A7E9B8A112D37EC400DA6239 /* classify.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B68E12D37EC400DA6239 /* classify.cpp */; }; A7E9B8A212D37EC400DA6239 /* classifyotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */; }; A7E9B8A312D37EC400DA6239 /* classifyseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */; }; A7E9B8A412D37EC400DA6239 /* clearcut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69412D37EC400DA6239 /* clearcut.cpp */; }; A7E9B8A512D37EC400DA6239 /* clearcutcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */; }; A7E9B8A612D37EC400DA6239 /* cluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69812D37EC400DA6239 /* cluster.cpp */; }; A7E9B8A712D37EC400DA6239 /* clusterclassic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69A12D37EC400DA6239 /* clusterclassic.cpp */; }; A7E9B8A812D37EC400DA6239 /* clustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */; }; A7E9B8A912D37EC400DA6239 /* clusterdoturcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69E12D37EC400DA6239 /* clusterdoturcommand.cpp */; }; A7E9B8AA12D37EC400DA6239 /* clusterfragmentscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A012D37EC400DA6239 /* clusterfragmentscommand.cpp */; }; A7E9B8AB12D37EC400DA6239 /* clustersplitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A212D37EC400DA6239 /* clustersplitcommand.cpp */; }; A7E9B8AC12D37EC400DA6239 /* cmdargs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */; }; A7E9B8AD12D37EC400DA6239 /* collect.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A612D37EC400DA6239 /* collect.cpp */; }; A7E9B8AE12D37EC400DA6239 /* collectcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */; }; A7E9B8AF12D37EC400DA6239 /* collectsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */; }; A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */; }; A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */; }; A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensus.cpp */; }; A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */; }; A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */; }; A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6BB12D37EC400DA6239 /* coverage.cpp */; }; A7E9B8B812D37EC400DA6239 /* decalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C112D37EC400DA6239 /* decalc.cpp */; }; A7E9B8B912D37EC400DA6239 /* uniqueseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C312D37EC400DA6239 /* uniqueseqscommand.cpp */; }; A7E9B8BA12D37EC400DA6239 /* degapseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C512D37EC400DA6239 /* degapseqscommand.cpp */; }; A7E9B8BB12D37EC400DA6239 /* deuniqueseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6C712D37EC400DA6239 /* deuniqueseqscommand.cpp */; }; A7E9B8BC12D37EC400DA6239 /* distancecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CB12D37EC400DA6239 /* distancecommand.cpp */; }; A7E9B8BD12D37EC400DA6239 /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */; }; A7E9B8BE12D37EC400DA6239 /* distclearcut.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6CF12D37EC400DA6239 /* distclearcut.cpp */; }; A7E9B8BF12D37EC400DA6239 /* dlibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */; }; A7E9B8C012D37EC400DA6239 /* dmat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D312D37EC400DA6239 /* dmat.cpp */; }; A7E9B8C112D37EC400DA6239 /* efron.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6D712D37EC400DA6239 /* efron.cpp */; }; A7E9B8C312D37EC400DA6239 /* fasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DC12D37EC400DA6239 /* fasta.cpp */; }; A7E9B8C412D37EC400DA6239 /* fastamap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */; }; A7E9B8C512D37EC400DA6239 /* fileoutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */; }; A7E9B8C612D37EC400DA6239 /* filterseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */; }; A7E9B8C812D37EC400DA6239 /* flowdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E712D37EC400DA6239 /* flowdata.cpp */; }; A7E9B8CB12D37EC400DA6239 /* fullmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6EE12D37EC400DA6239 /* fullmatrix.cpp */; }; A7E9B8CC12D37EC400DA6239 /* geom.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F012D37EC400DA6239 /* geom.cpp */; }; A7E9B8CD12D37EC400DA6239 /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */; }; A7E9B8CE12D37EC400DA6239 /* getgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */; }; A7E9B8CF12D37EC400DA6239 /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */; }; A7E9B8D012D37EC400DA6239 /* getlineagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */; }; A7E9B8D112D37EC400DA6239 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FA12D37EC400DA6239 /* getlistcountcommand.cpp */; }; A7E9B8D212D37EC400DA6239 /* getopt_long.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FC12D37EC400DA6239 /* getopt_long.cpp */; }; A7E9B8D312D37EC400DA6239 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */; }; A7E9B8D512D37EC400DA6239 /* getrabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */; }; A7E9B8D612D37EC400DA6239 /* getrelabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70412D37EC400DA6239 /* getrelabundcommand.cpp */; }; A7E9B8D712D37EC400DA6239 /* getsabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70612D37EC400DA6239 /* getsabundcommand.cpp */; }; A7E9B8D812D37EC400DA6239 /* getseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */; }; A7E9B8D912D37EC400DA6239 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */; }; A7E9B8DB12D37EC400DA6239 /* goodscoverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70E12D37EC400DA6239 /* goodscoverage.cpp */; }; A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */; }; A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71212D37EC400DA6239 /* gower.cpp */; }; A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71412D37EC400DA6239 /* groupmap.cpp */; }; A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71612D37EC400DA6239 /* hamming.cpp */; }; A7E9B8E212D37EC400DA6239 /* heatmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71C12D37EC400DA6239 /* heatmap.cpp */; }; A7E9B8E312D37EC400DA6239 /* heatmapcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */; }; A7E9B8E412D37EC400DA6239 /* heatmapsim.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */; }; A7E9B8E512D37EC400DA6239 /* heatmapsimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72212D37EC400DA6239 /* heatmapsimcommand.cpp */; }; A7E9B8E612D37EC400DA6239 /* heip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72412D37EC400DA6239 /* heip.cpp */; }; A7E9B8E712D37EC400DA6239 /* hellinger.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72612D37EC400DA6239 /* hellinger.cpp */; }; A7E9B8E812D37EC400DA6239 /* helpcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72812D37EC400DA6239 /* helpcommand.cpp */; }; A7E9B8E912D37EC400DA6239 /* indicatorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */; }; A7E9B8EA12D37EC400DA6239 /* inputdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72D12D37EC400DA6239 /* inputdata.cpp */; }; A7E9B8EB12D37EC400DA6239 /* invsimpson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72F12D37EC400DA6239 /* invsimpson.cpp */; }; A7E9B8EC12D37EC400DA6239 /* jackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73112D37EC400DA6239 /* jackknife.cpp */; }; A7E9B8ED12D37EC400DA6239 /* kmer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73312D37EC400DA6239 /* kmer.cpp */; }; A7E9B8EE12D37EC400DA6239 /* kmerdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73512D37EC400DA6239 /* kmerdb.cpp */; }; A7E9B8EF12D37EC400DA6239 /* knn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73712D37EC400DA6239 /* knn.cpp */; }; A7E9B8F012D37EC400DA6239 /* libshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73912D37EC400DA6239 /* libshuff.cpp */; }; A7E9B8F112D37EC400DA6239 /* libshuffcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */; }; A7E9B8F212D37EC400DA6239 /* listseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */; }; A7E9B8F312D37EC400DA6239 /* listvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B73F12D37EC400DA6239 /* listvector.cpp */; }; A7E9B8F412D37EC400DA6239 /* logsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74112D37EC400DA6239 /* logsd.cpp */; }; A7E9B8F512D37EC400DA6239 /* makegroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */; }; A7E9B8F612D37EC400DA6239 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74512D37EC400DA6239 /* maligner.cpp */; }; A7E9B8F712D37EC400DA6239 /* manhattan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74712D37EC400DA6239 /* manhattan.cpp */; }; A7E9B8F812D37EC400DA6239 /* distsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74912D37EC400DA6239 /* distsharedcommand.cpp */; }; A7E9B8F912D37EC400DA6239 /* memchi2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74B12D37EC400DA6239 /* memchi2.cpp */; }; A7E9B8FA12D37EC400DA6239 /* memchord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74D12D37EC400DA6239 /* memchord.cpp */; }; A7E9B8FB12D37EC400DA6239 /* memeuclidean.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B74F12D37EC400DA6239 /* memeuclidean.cpp */; }; A7E9B8FC12D37EC400DA6239 /* mempearson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75112D37EC400DA6239 /* mempearson.cpp */; }; A7E9B8FD12D37EC400DA6239 /* mergefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */; }; A7E9B8FF12D37EC400DA6239 /* metastatscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */; }; A7E9B90012D37EC400DA6239 /* mgclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */; }; A7E9B90112D37EC400DA6239 /* mothur.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75B12D37EC400DA6239 /* mothur.cpp */; }; A7E9B90212D37EC400DA6239 /* mothurout.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75D12D37EC400DA6239 /* mothurout.cpp */; }; A7E9B90312D37EC400DA6239 /* nameassignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */; }; A7E9B90412D37EC400DA6239 /* nast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76112D37EC400DA6239 /* nast.cpp */; }; A7E9B90512D37EC400DA6239 /* alignreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76312D37EC400DA6239 /* alignreport.cpp */; }; A7E9B90612D37EC400DA6239 /* needlemanoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76512D37EC400DA6239 /* needlemanoverlap.cpp */; }; A7E9B90712D37EC400DA6239 /* noalign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76712D37EC400DA6239 /* noalign.cpp */; }; A7E9B90812D37EC400DA6239 /* nocommands.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76912D37EC400DA6239 /* nocommands.cpp */; }; A7E9B90912D37EC400DA6239 /* normalizesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */; }; A7E9B90A12D37EC400DA6239 /* npshannon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B76D12D37EC400DA6239 /* npshannon.cpp */; }; A7E9B90B12D37EC400DA6239 /* odum.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77112D37EC400DA6239 /* odum.cpp */; }; A7E9B90C12D37EC400DA6239 /* optionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77512D37EC400DA6239 /* optionparser.cpp */; }; A7E9B90D12D37EC400DA6239 /* ordervector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77712D37EC400DA6239 /* ordervector.cpp */; }; A7E9B90E12D37EC400DA6239 /* otuhierarchycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */; }; A7E9B90F12D37EC400DA6239 /* overlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77B12D37EC400DA6239 /* overlap.cpp */; }; A7E9B91012D37EC400DA6239 /* pairwiseseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */; }; A7E9B91112D37EC400DA6239 /* fastaqinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77F12D37EC400DA6239 /* fastaqinfocommand.cpp */; }; A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78312D37EC400DA6239 /* parsimony.cpp */; }; A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */; }; A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */; }; A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */; }; A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */; }; A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78F12D37EC400DA6239 /* phylotree.cpp */; }; A7E9B91A12D37EC400DA6239 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */; }; A7E9B91B12D37EC400DA6239 /* pintail.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79312D37EC400DA6239 /* pintail.cpp */; }; A7E9B91D12D37EC400DA6239 /* preclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */; }; A7E9B91E12D37EC400DA6239 /* prng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79912D37EC400DA6239 /* prng.cpp */; }; A7E9B92012D37EC400DA6239 /* qstat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79D12D37EC400DA6239 /* qstat.cpp */; }; A7E9B92112D37EC400DA6239 /* qualityscores.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */; }; A7E9B92212D37EC400DA6239 /* quitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */; }; A7E9B92312D37EC400DA6239 /* rabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */; }; A7E9B92512D37EC400DA6239 /* raredisplay.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A712D37EC400DA6239 /* raredisplay.cpp */; }; A7E9B92612D37EC400DA6239 /* rarefact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A912D37EC400DA6239 /* rarefact.cpp */; }; A7E9B92712D37EC400DA6239 /* rarefactcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */; }; A7E9B92812D37EC400DA6239 /* rarefactsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */; }; A7E9B92912D37EC400DA6239 /* readblast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B012D37EC400DA6239 /* readblast.cpp */; }; A7E9B92A12D37EC400DA6239 /* readcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B212D37EC400DA6239 /* readcluster.cpp */; }; A7E9B92B12D37EC400DA6239 /* readcolumn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */; }; A7E9B92F12D37EC400DA6239 /* readphylip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */; }; A7E9B93012D37EC400DA6239 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7BF12D37EC400DA6239 /* readtree.cpp */; }; A7E9B93212D37EC400DA6239 /* removegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */; }; A7E9B93312D37EC400DA6239 /* removelineagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */; }; A7E9B93512D37EC400DA6239 /* removeseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */; }; A7E9B93712D37EC400DA6239 /* reversecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */; }; A7E9B93812D37EC400DA6239 /* sabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */; }; A7E9B93912D37EC400DA6239 /* screenseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D112D37EC400DA6239 /* screenseqscommand.cpp */; }; A7E9B93A12D37EC400DA6239 /* aligncheckcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D312D37EC400DA6239 /* aligncheckcommand.cpp */; }; A7E9B93B12D37EC400DA6239 /* sensspeccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D512D37EC400DA6239 /* sensspeccommand.cpp */; }; A7E9B93C12D37EC400DA6239 /* seqerrorcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D712D37EC400DA6239 /* seqerrorcommand.cpp */; }; A7E9B93D12D37EC400DA6239 /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7D912D37EC400DA6239 /* seqsummarycommand.cpp */; }; A7E9B93E12D37EC400DA6239 /* sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DB12D37EC400DA6239 /* sequence.cpp */; }; A7E9B93F12D37EC400DA6239 /* sequencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DD12D37EC400DA6239 /* sequencedb.cpp */; }; A7E9B94012D37EC400DA6239 /* setdircommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7DF12D37EC400DA6239 /* setdircommand.cpp */; }; A7E9B94112D37EC400DA6239 /* setlogfilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E112D37EC400DA6239 /* setlogfilecommand.cpp */; }; A7E9B94212D37EC400DA6239 /* sffinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E312D37EC400DA6239 /* sffinfocommand.cpp */; }; A7E9B94312D37EC400DA6239 /* shannon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E512D37EC400DA6239 /* shannon.cpp */; }; A7E9B94412D37EC400DA6239 /* shannoneven.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */; }; A7E9B94512D37EC400DA6239 /* sharedace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7E912D37EC400DA6239 /* sharedace.cpp */; }; A7E9B94612D37EC400DA6239 /* sharedanderbergs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */; }; A7E9B94712D37EC400DA6239 /* sharedbraycurtis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7EE12D37EC400DA6239 /* sharedbraycurtis.cpp */; }; A7E9B94812D37EC400DA6239 /* sharedchao1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */; }; A7E9B94912D37EC400DA6239 /* makesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F212D37EC400DA6239 /* makesharedcommand.cpp */; }; A7E9B94A12D37EC400DA6239 /* sharedjabund.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */; }; A7E9B94B12D37EC400DA6239 /* sharedjackknife.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */; }; A7E9B94C12D37EC400DA6239 /* sharedjclass.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7F812D37EC400DA6239 /* sharedjclass.cpp */; }; A7E9B94D12D37EC400DA6239 /* sharedjest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */; }; A7E9B94E12D37EC400DA6239 /* sharedkstest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */; }; A7E9B94F12D37EC400DA6239 /* sharedkulczynski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */; }; A7E9B95012D37EC400DA6239 /* sharedkulczynskicody.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80012D37EC400DA6239 /* sharedkulczynskicody.cpp */; }; A7E9B95112D37EC400DA6239 /* sharedlennon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80212D37EC400DA6239 /* sharedlennon.cpp */; }; A7E9B95212D37EC400DA6239 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80412D37EC400DA6239 /* sharedlistvector.cpp */; }; A7E9B95312D37EC400DA6239 /* sharedmarczewski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80612D37EC400DA6239 /* sharedmarczewski.cpp */; }; A7E9B95412D37EC400DA6239 /* sharedmorisitahorn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80812D37EC400DA6239 /* sharedmorisitahorn.cpp */; }; A7E9B95512D37EC400DA6239 /* sharedochiai.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80B12D37EC400DA6239 /* sharedochiai.cpp */; }; A7E9B95612D37EC400DA6239 /* sharedordervector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B80D12D37EC400DA6239 /* sharedordervector.cpp */; }; A7E9B95A12D37EC400DA6239 /* sharedsobs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81512D37EC400DA6239 /* sharedsobs.cpp */; }; A7E9B95B12D37EC400DA6239 /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81712D37EC400DA6239 /* sharedsobscollectsummary.cpp */; }; A7E9B95C12D37EC400DA6239 /* sharedsorabund.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81912D37EC400DA6239 /* sharedsorabund.cpp */; }; A7E9B95D12D37EC400DA6239 /* sharedsorclass.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81B12D37EC400DA6239 /* sharedsorclass.cpp */; }; A7E9B95E12D37EC400DA6239 /* sharedsorest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81D12D37EC400DA6239 /* sharedsorest.cpp */; }; A7E9B95F12D37EC400DA6239 /* sharedthetan.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B81F12D37EC400DA6239 /* sharedthetan.cpp */; }; A7E9B96012D37EC400DA6239 /* sharedthetayc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82112D37EC400DA6239 /* sharedthetayc.cpp */; }; A7E9B96212D37EC400DA6239 /* shen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82512D37EC400DA6239 /* shen.cpp */; }; A7E9B96312D37EC400DA6239 /* shhhercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82712D37EC400DA6239 /* shhhercommand.cpp */; }; A7E9B96412D37EC400DA6239 /* simpson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82912D37EC400DA6239 /* simpson.cpp */; }; A7E9B96512D37EC400DA6239 /* simpsoneven.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82B12D37EC400DA6239 /* simpsoneven.cpp */; }; A7E9B96612D37EC400DA6239 /* singlelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */; }; A7E9B96712D37EC400DA6239 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B82E12D37EC400DA6239 /* slayer.cpp */; }; A7E9B96812D37EC400DA6239 /* slibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83012D37EC400DA6239 /* slibshuff.cpp */; }; A7E9B96912D37EC400DA6239 /* smithwilson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83212D37EC400DA6239 /* smithwilson.cpp */; }; A7E9B96A12D37EC400DA6239 /* soergel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83512D37EC400DA6239 /* soergel.cpp */; }; A7E9B96B12D37EC400DA6239 /* solow.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83712D37EC400DA6239 /* solow.cpp */; }; A7E9B96C12D37EC400DA6239 /* sparsematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */; }; A7E9B96D12D37EC400DA6239 /* spearman.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83B12D37EC400DA6239 /* spearman.cpp */; }; A7E9B96E12D37EC400DA6239 /* speciesprofile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83D12D37EC400DA6239 /* speciesprofile.cpp */; }; A7E9B96F12D37EC400DA6239 /* splitabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */; }; A7E9B97012D37EC400DA6239 /* splitgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */; }; A7E9B97112D37EC400DA6239 /* splitmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */; }; A7E9B97212D37EC400DA6239 /* structchi2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84512D37EC400DA6239 /* structchi2.cpp */; }; A7E9B97312D37EC400DA6239 /* structchord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84712D37EC400DA6239 /* structchord.cpp */; }; A7E9B97412D37EC400DA6239 /* structeuclidean.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84912D37EC400DA6239 /* structeuclidean.cpp */; }; A7E9B97512D37EC400DA6239 /* structkulczynski.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84B12D37EC400DA6239 /* structkulczynski.cpp */; }; A7E9B97612D37EC400DA6239 /* structpearson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84D12D37EC400DA6239 /* structpearson.cpp */; }; A7E9B97712D37EC400DA6239 /* subsamplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */; }; A7E9B97812D37EC400DA6239 /* suffixdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85112D37EC400DA6239 /* suffixdb.cpp */; }; A7E9B97912D37EC400DA6239 /* suffixnodes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */; }; A7E9B97A12D37EC400DA6239 /* suffixtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85512D37EC400DA6239 /* suffixtree.cpp */; }; A7E9B97B12D37EC400DA6239 /* summarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85712D37EC400DA6239 /* summarycommand.cpp */; }; A7E9B97C12D37EC400DA6239 /* summarysharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */; }; A7E9B97D12D37EC400DA6239 /* systemcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85B12D37EC400DA6239 /* systemcommand.cpp */; }; A7E9B97E12D37EC400DA6239 /* taxonomyequalizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */; }; A7E9B97F12D37EC400DA6239 /* tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B85F12D37EC400DA6239 /* tree.cpp */; }; A7E9B98012D37EC400DA6239 /* treesharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86212D37EC400DA6239 /* treesharedcommand.cpp */; }; A7E9B98112D37EC400DA6239 /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86412D37EC400DA6239 /* treemap.cpp */; }; A7E9B98212D37EC400DA6239 /* treenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86612D37EC400DA6239 /* treenode.cpp */; }; A7E9B98312D37EC400DA6239 /* trimflowscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86812D37EC400DA6239 /* trimflowscommand.cpp */; }; A7E9B98412D37EC400DA6239 /* trimseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86A12D37EC400DA6239 /* trimseqscommand.cpp */; }; A7E9B98512D37EC400DA6239 /* unifracunweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86C12D37EC400DA6239 /* unifracunweightedcommand.cpp */; }; A7E9B98612D37EC400DA6239 /* unifracweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B86E12D37EC400DA6239 /* unifracweightedcommand.cpp */; }; A7E9B98712D37EC400DA6239 /* unweighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87012D37EC400DA6239 /* unweighted.cpp */; }; A7E9B98812D37EC400DA6239 /* uvest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87212D37EC400DA6239 /* uvest.cpp */; }; A7E9B98912D37EC400DA6239 /* validcalculator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87412D37EC400DA6239 /* validcalculator.cpp */; }; A7E9B98A12D37EC400DA6239 /* validparameter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87612D37EC400DA6239 /* validparameter.cpp */; }; A7E9B98B12D37EC400DA6239 /* venn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87812D37EC400DA6239 /* venn.cpp */; }; A7E9B98C12D37EC400DA6239 /* venncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87A12D37EC400DA6239 /* venncommand.cpp */; }; A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; }; A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; }; A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; }; A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; }; A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */; }; A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FA10011302E096003860FE /* mantelcommand.cpp */; }; A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; }; A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC486612D795D60055BC5C /* pcacommand.cpp */; }; A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */; }; A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FE7E6C13311EA400F7B327 /* setcurrentcommand.cpp */; }; A7FF19F2140FFDA500AD216D /* trimoligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FF19F1140FFDA500AD216D /* trimoligos.cpp */; }; A7FFB558142CA02C004884F2 /* summarytaxcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FFB557142CA02C004884F2 /* summarytaxcommand.cpp */; }; F4103AD325A4DB7F001ED741 /* sharedrabundvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA701EC9D31400F0F6C0 /* sharedrabundvectors.cpp */; }; F4103AD625A4DB80001ED741 /* sharedrabundvectors.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48BDDA701EC9D31400F0F6C0 /* sharedrabundvectors.cpp */; }; F41A1B91261257DE00144985 /* kmerdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F41A1B8F261257DE00144985 /* kmerdist.cpp */; }; F44268EE27BD52D50000C15D /* alignmusclecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F44268EC27BD52D50000C15D /* alignmusclecommand.cpp */; }; F44268EF27BD52D50000C15D /* alignmusclecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F44268EC27BD52D50000C15D /* alignmusclecommand.cpp */; }; F45A2E3D25A78B4D00994F76 /* contigsreport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F45A2E3C25A78B4D00994F76 /* contigsreport.cpp */; }; F4A866B7265BE7720010479A /* protein.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866B5265BE7720010479A /* protein.cpp */; }; F4A866B8265BE7720010479A /* protein.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866B5265BE7720010479A /* protein.cpp */; }; F4A866BF265BE7EC0010479A /* aminoacid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866BD265BE7EC0010479A /* aminoacid.cpp */; }; F4A866C0265BE7EC0010479A /* aminoacid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866BD265BE7EC0010479A /* aminoacid.cpp */; }; F4A866D1266912830010479A /* proteindb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866CF266912830010479A /* proteindb.cpp */; }; F4A866D2266912830010479A /* proteindb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A866CF266912830010479A /* proteindb.cpp */; }; F4A86713268F5CCE0010479A /* kimura.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4A86711268F5CCE0010479A /* kimura.cpp */; }; F4B4B0DC27396EF7003B2133 /* translateseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4B4B0DA27396EF7003B2133 /* translateseqscommand.cpp */; }; F4B4B0DD27396EF7003B2133 /* translateseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F4B4B0DA27396EF7003B2133 /* translateseqscommand.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXBuildRule section */ 481FB6A11AC1BE060076CFF3 /* PBXBuildRule */ = { isa = PBXBuildRule; compilerSpec = com.apple.compilers.proxy.script; fileType = sourcecode.fortran; inputFiles = ( ); isEditable = 1; outputFiles = ( "$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o", ); script = ""; }; A7D162CB149F96CA000523E8 /* PBXBuildRule */ = { isa = PBXBuildRule; compilerSpec = com.apple.compilers.proxy.script; fileType = sourcecode.fortran; inputFiles = ( ); isEditable = 1; outputFiles = ( "$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o", ); script = "# Type a script or drag a script file from your workspace to insert its path.\n"; }; /* End PBXBuildRule section */ /* Begin PBXCopyFilesBuildPhase section */ 481FB5171AC0A63E0076CFF3 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; dstPath = /usr/share/man/man1/; dstSubfolderSpec = 0; files = ( ); runOnlyForDeploymentPostprocessing = 1; }; 8DD76FAF0486AB0100D96B5E /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 12; dstPath = ""; dstSubfolderSpec = 16; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ 2114A7671C654D7400D3D8D9 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = averagelinkage.cpp; path = source/averagelinkage.cpp; sourceTree = SOURCE_ROOT; }; 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = newcommandtemplate.cpp; path = source/commands/newcommandtemplate.cpp; sourceTree = SOURCE_ROOT; }; 219C1DE11552C508004209F9 /* newcommandtemplate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = newcommandtemplate.h; path = source/commands/newcommandtemplate.h; sourceTree = SOURCE_ROOT; }; 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getcoremicrobiomecommand.cpp; path = source/commands/getcoremicrobiomecommand.cpp; sourceTree = SOURCE_ROOT; }; 219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getcoremicrobiomecommand.h; path = source/commands/getcoremicrobiomecommand.h; sourceTree = SOURCE_ROOT; }; 4803D5AB211CA67F001C63B5 /* testsharedrabundvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsharedrabundvector.cpp; path = TestMothur/testcontainers/testsharedrabundvector.cpp; sourceTree = SOURCE_ROOT; }; 4803D5AC211CA67F001C63B5 /* testsharedrabundvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testsharedrabundvector.hpp; path = TestMothur/testcontainers/testsharedrabundvector.hpp; sourceTree = SOURCE_ROOT; }; 4803D5AE211CD839001C63B5 /* testsharedrabundfloatvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsharedrabundfloatvector.cpp; path = TestMothur/testcontainers/testsharedrabundfloatvector.cpp; sourceTree = SOURCE_ROOT; }; 4803D5AF211CD839001C63B5 /* testsharedrabundfloatvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testsharedrabundfloatvector.hpp; path = TestMothur/testcontainers/testsharedrabundfloatvector.hpp; sourceTree = SOURCE_ROOT; }; 4803D5B1211DDA5A001C63B5 /* testsharedrabundvectors.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsharedrabundvectors.cpp; path = TestMothur/testcontainers/testsharedrabundvectors.cpp; sourceTree = SOURCE_ROOT; }; 4803D5B2211DDA5A001C63B5 /* testsharedrabundvectors.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testsharedrabundvectors.hpp; path = TestMothur/testcontainers/testsharedrabundvectors.hpp; sourceTree = SOURCE_ROOT; }; 4803D5B421231D9D001C63B5 /* testsharedrabundfloatvectors.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsharedrabundfloatvectors.cpp; path = TestMothur/testcontainers/testsharedrabundfloatvectors.cpp; sourceTree = SOURCE_ROOT; }; 4803D5B521231D9D001C63B5 /* testsharedrabundfloatvectors.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testsharedrabundfloatvectors.hpp; path = TestMothur/testcontainers/testsharedrabundfloatvectors.hpp; sourceTree = SOURCE_ROOT; }; 48098ED4219DE7A500031FA4 /* testsubsample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testsubsample.cpp; sourceTree = ""; }; 48098ED5219DE7A500031FA4 /* testsubsample.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = testsubsample.hpp; sourceTree = ""; }; 4809EC94227B2CB500B4D0E5 /* metrolognormal.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = metrolognormal.hpp; path = source/calculators/metrolognormal.hpp; sourceTree = SOURCE_ROOT; }; 4809EC96227B405700B4D0E5 /* metrologstudent.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = metrologstudent.cpp; path = source/calculators/metrologstudent.cpp; sourceTree = SOURCE_ROOT; }; 4809EC9A227B5D2500B4D0E5 /* metrologstudent.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = metrologstudent.hpp; path = source/calculators/metrologstudent.hpp; sourceTree = SOURCE_ROOT; }; 4809EC9B227C9B3100B4D0E5 /* metrosichel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = metrosichel.cpp; path = source/calculators/metrosichel.cpp; sourceTree = SOURCE_ROOT; }; 4809EC9C227C9B3100B4D0E5 /* metrosichel.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = metrosichel.hpp; path = source/calculators/metrosichel.hpp; sourceTree = SOURCE_ROOT; }; 4809EC9F2280898E00B4D0E5 /* igrarefaction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = igrarefaction.cpp; path = source/calculators/igrarefaction.cpp; sourceTree = SOURCE_ROOT; }; 4809ECA02280898E00B4D0E5 /* igrarefaction.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = igrarefaction.hpp; path = source/calculators/igrarefaction.hpp; sourceTree = SOURCE_ROOT; }; 4809ECA322831A5E00B4D0E5 /* lnabundance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lnabundance.cpp; path = source/calculators/lnabundance.cpp; sourceTree = SOURCE_ROOT; }; 4809ECA422831A5E00B4D0E5 /* lnabundance.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lnabundance.hpp; path = source/calculators/lnabundance.hpp; sourceTree = SOURCE_ROOT; }; 480D1E281EA681D100BF9C77 /* testclustercalcs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testclustercalcs.cpp; path = TestMothur/testclustercalcs.cpp; sourceTree = SOURCE_ROOT; }; 480D1E291EA681D100BF9C77 /* testclustercalcs.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testclustercalcs.hpp; path = TestMothur/testclustercalcs.hpp; sourceTree = SOURCE_ROOT; }; 480D1E2D1EA685C500BF9C77 /* fakemcc.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fakemcc.hpp; path = TestMothur/fakes/fakemcc.hpp; sourceTree = SOURCE_ROOT; }; 480D1E2F1EA92D5500BF9C77 /* fakeoptimatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fakeoptimatrix.cpp; path = TestMothur/fakes/fakeoptimatrix.cpp; sourceTree = SOURCE_ROOT; }; 480D1E301EA92D5500BF9C77 /* fakeoptimatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fakeoptimatrix.hpp; path = fakes/fakeoptimatrix.hpp; sourceTree = ""; }; 480E8DAF1CAB12ED00A0D137 /* testfastqread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testfastqread.cpp; path = TestMothur/testcontainers/testfastqread.cpp; sourceTree = SOURCE_ROOT; }; 480E8DB01CAB12ED00A0D137 /* testfastqread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testfastqread.h; path = TestMothur/testcontainers/testfastqread.h; sourceTree = SOURCE_ROOT; }; 4810D5B5218208CC00C668E8 /* testcounttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testcounttable.cpp; path = testcontainers/testcounttable.cpp; sourceTree = ""; }; 4810D5B6218208CC00C668E8 /* testcounttable.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testcounttable.hpp; path = testcontainers/testcounttable.hpp; sourceTree = ""; }; 4815BEAF2289E13500677EE2 /* lnrarefaction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lnrarefaction.cpp; path = source/calculators/lnrarefaction.cpp; sourceTree = SOURCE_ROOT; }; 4815BEB02289E13500677EE2 /* lnrarefaction.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lnrarefaction.hpp; path = source/calculators/lnrarefaction.hpp; sourceTree = SOURCE_ROOT; }; 4815BEB2228B371E00677EE2 /* lnshift.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lnshift.cpp; path = source/calculators/lnshift.cpp; sourceTree = SOURCE_ROOT; }; 4815BEB3228B371E00677EE2 /* lnshift.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lnshift.hpp; path = source/calculators/lnshift.hpp; sourceTree = SOURCE_ROOT; }; 4815BEB6228DD18400677EE2 /* lsabundance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lsabundance.cpp; path = source/calculators/lsabundance.cpp; sourceTree = SOURCE_ROOT; }; 4815BEB7228DD18400677EE2 /* lsabundance.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lsabundance.hpp; path = source/calculators/lsabundance.hpp; sourceTree = SOURCE_ROOT; }; 4815BEBA2293189600677EE2 /* lsrarefaction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lsrarefaction.cpp; path = source/calculators/lsrarefaction.cpp; sourceTree = SOURCE_ROOT; }; 4815BEBB2293189600677EE2 /* lsrarefaction.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lsrarefaction.hpp; path = source/calculators/lsrarefaction.hpp; sourceTree = SOURCE_ROOT; }; 4815BEBF2295CE6800677EE2 /* siabundance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = siabundance.cpp; path = source/calculators/siabundance.cpp; sourceTree = SOURCE_ROOT; }; 4815BEC02295CE6800677EE2 /* siabundance.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = siabundance.hpp; path = source/calculators/siabundance.hpp; sourceTree = SOURCE_ROOT; }; 4815BEC32296F19500677EE2 /* sirarefaction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sirarefaction.cpp; path = source/calculators/sirarefaction.cpp; sourceTree = SOURCE_ROOT; }; 4815BEC42296F19500677EE2 /* sirarefaction.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sirarefaction.hpp; path = source/calculators/sirarefaction.hpp; sourceTree = SOURCE_ROOT; }; 4815BEC722970FA700677EE2 /* sishift.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sishift.cpp; path = source/calculators/sishift.cpp; sourceTree = SOURCE_ROOT; }; 4815BEC822970FA700677EE2 /* sishift.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sishift.hpp; path = source/calculators/sishift.hpp; sourceTree = SOURCE_ROOT; }; 4815BECB229717E100677EE2 /* diversitycalc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = diversitycalc.h; path = source/calculators/diversitycalc.h; sourceTree = SOURCE_ROOT; }; 481623E11B56A2DB004C60B7 /* pcrseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pcrseqscommand.cpp; path = source/commands/pcrseqscommand.cpp; sourceTree = SOURCE_ROOT; }; 481623E31B58267D004C60B7 /* INSTALL.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = INSTALL.md; sourceTree = SOURCE_ROOT; }; 481E40DA244DFF5A0059C925 /* onegapignore.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = onegapignore.cpp; path = source/calculators/onegapignore.cpp; sourceTree = SOURCE_ROOT; }; 481E40DC244F52460059C925 /* ignoregaps.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = ignoregaps.cpp; path = source/calculators/ignoregaps.cpp; sourceTree = SOURCE_ROOT; }; 481E40DE244F619D0059C925 /* eachgapignore.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = eachgapignore.cpp; path = source/calculators/eachgapignore.cpp; sourceTree = SOURCE_ROOT; }; 481E40E0244F62980059C925 /* calculator.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = calculator.cpp; path = source/calculators/calculator.cpp; sourceTree = SOURCE_ROOT; }; 481E40E2244F6A050059C925 /* eachgapdist.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = eachgapdist.cpp; path = source/calculators/eachgapdist.cpp; sourceTree = SOURCE_ROOT; }; 481FB5191AC0A63E0076CFF3 /* TestMothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = TestMothur; sourceTree = BUILT_PRODUCTS_DIR; }; 481FB51B1AC0A63E0076CFF3 /* main.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = TestMothur/main.cpp; sourceTree = SOURCE_ROOT; }; 481FB5281AC19F8B0076CFF3 /* setseedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = setseedcommand.cpp; path = source/commands/setseedcommand.cpp; sourceTree = SOURCE_ROOT; }; 481FB5291AC19F8B0076CFF3 /* setseedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = setseedcommand.h; path = source/commands/setseedcommand.h; sourceTree = SOURCE_ROOT; }; 481FB52D1AC1B0CB0076CFF3 /* testsetseedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsetseedcommand.cpp; path = TestMothur/testcommands/testsetseedcommand.cpp; sourceTree = SOURCE_ROOT; }; 4827A4DA1CB3ED2100345170 /* fastqdataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fastqdataset.cpp; path = TestMothur/fastqdataset.cpp; sourceTree = SOURCE_ROOT; }; 4827A4DB1CB3ED2100345170 /* fastqdataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fastqdataset.h; path = TestMothur/fastqdataset.h; sourceTree = SOURCE_ROOT; }; 4829D9651B8387D0002EEED4 /* testbiominfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testbiominfocommand.cpp; path = TestMothur/testbiominfocommand.cpp; sourceTree = SOURCE_ROOT; }; 4829D9661B8387D0002EEED4 /* testbiominfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testbiominfocommand.h; path = TestMothur/testbiominfocommand.h; sourceTree = SOURCE_ROOT; }; 482AC3B72562B57600C9AF4A /* picrust.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = picrust.cpp; path = source/datastructures/picrust.cpp; sourceTree = SOURCE_ROOT; }; 482AC3B82562B57600C9AF4A /* picrust.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = picrust.hpp; path = source/datastructures/picrust.hpp; sourceTree = SOURCE_ROOT; }; 4837E5D622DE1BC400D3234B /* TestBatches */ = {isa = PBXFileReference; lastKnownFileType = folder; path = TestBatches; sourceTree = SOURCE_ROOT; }; 483A9BAC225BBE55006102DF /* metroig.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = metroig.cpp; path = source/calculators/metroig.cpp; sourceTree = SOURCE_ROOT; }; 483A9BAD225BBE55006102DF /* metroig.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = metroig.hpp; path = source/calculators/metroig.hpp; sourceTree = SOURCE_ROOT; }; 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testtrimoligos.cpp; path = TestMothur/testtrimoligos.cpp; sourceTree = SOURCE_ROOT; }; 4846AD891D3810DD00DE9913 /* testtrimoligos.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testtrimoligos.hpp; path = TestMothur/testtrimoligos.hpp; sourceTree = SOURCE_ROOT; }; 484976DD22552E0B00F3A291 /* erarefaction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = erarefaction.cpp; path = source/calculators/erarefaction.cpp; sourceTree = SOURCE_ROOT; }; 484976DE22552E0B00F3A291 /* erarefaction.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = erarefaction.hpp; path = source/calculators/erarefaction.hpp; sourceTree = SOURCE_ROOT; }; 484976E12255412400F3A291 /* igabundance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = igabundance.cpp; path = source/calculators/igabundance.cpp; sourceTree = SOURCE_ROOT; }; 484976E22255412400F3A291 /* igabundance.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = igabundance.hpp; path = source/calculators/igabundance.hpp; sourceTree = SOURCE_ROOT; }; 484976E52256799100F3A291 /* diversityestimatorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = diversityestimatorcommand.cpp; path = source/commands/diversityestimatorcommand.cpp; sourceTree = SOURCE_ROOT; }; 484F21691BA1C5F8001C1B5F /* makefile-internal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "makefile-internal"; sourceTree = SOURCE_ROOT; }; 48576EA31D05E8F600BBC9C0 /* testoptimatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testoptimatrix.cpp; path = TestMothur/testcontainers/testoptimatrix.cpp; sourceTree = SOURCE_ROOT; }; 48576EA41D05E8F600BBC9C0 /* testoptimatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testoptimatrix.h; path = TestMothur/testcontainers/testoptimatrix.h; sourceTree = SOURCE_ROOT; }; 48576EA61D05F59300BBC9C0 /* distpdataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = distpdataset.cpp; path = TestMothur/distpdataset.cpp; sourceTree = SOURCE_ROOT; }; 48576EA71D05F59300BBC9C0 /* distpdataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = distpdataset.h; path = TestMothur/distpdataset.h; sourceTree = SOURCE_ROOT; }; 485B0E061F264F2E00CA5F57 /* sharedrabundvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrabundvector.cpp; path = source/datastructures/sharedrabundvector.cpp; sourceTree = SOURCE_ROOT; }; 485B0E071F264F2E00CA5F57 /* sharedrabundvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sharedrabundvector.hpp; path = source/datastructures/sharedrabundvector.hpp; sourceTree = SOURCE_ROOT; }; 485B0E0C1F27C40500CA5F57 /* sharedrabundfloatvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrabundfloatvector.cpp; path = source/datastructures/sharedrabundfloatvector.cpp; sourceTree = SOURCE_ROOT; }; 485B0E0D1F27C40500CA5F57 /* sharedrabundfloatvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sharedrabundfloatvector.hpp; path = source/datastructures/sharedrabundfloatvector.hpp; sourceTree = SOURCE_ROOT; }; 486741981FD9ACCE00B07480 /* sharedwriter.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sharedwriter.hpp; path = source/sharedwriter.hpp; sourceTree = SOURCE_ROOT; }; 4867419A1FD9B3FE00B07480 /* writer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = writer.h; path = source/writer.h; sourceTree = SOURCE_ROOT; }; 48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getmimarkspackagecommand.cpp; path = source/commands/getmimarkspackagecommand.cpp; sourceTree = SOURCE_ROOT; }; 48705ABC19BE32C50075E977 /* getmimarkspackagecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getmimarkspackagecommand.h; path = source/commands/getmimarkspackagecommand.h; sourceTree = SOURCE_ROOT; }; 48705ABD19BE32C50075E977 /* oligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = oligos.cpp; path = source/datastructures/oligos.cpp; sourceTree = SOURCE_ROOT; }; 48705ABE19BE32C50075E977 /* oligos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = oligos.h; path = source/datastructures/oligos.h; sourceTree = SOURCE_ROOT; }; 48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergesfffilecommand.cpp; path = source/commands/mergesfffilecommand.cpp; sourceTree = SOURCE_ROOT; }; 48705AC019BE32C50075E977 /* mergesfffilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergesfffilecommand.h; path = source/commands/mergesfffilecommand.h; sourceTree = SOURCE_ROOT; }; 48705AC119BE32C50075E977 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrjsd.cpp; path = source/calculators/sharedrjsd.cpp; sourceTree = SOURCE_ROOT; }; 48705AC219BE32C50075E977 /* sharedrjsd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedrjsd.h; path = source/calculators/sharedrjsd.h; sourceTree = SOURCE_ROOT; }; 4875F69922DCC723006A7D8C /* Ubuntu_20_Build.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Ubuntu_20_Build.txt; sourceTree = SOURCE_ROOT; }; 48789AEF2061776100A7D848 /* utf8.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = utf8.h; path = source/utf8.h; sourceTree = SOURCE_ROOT; }; 48789AF02061776100A7D848 /* checked.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = checked.h; path = source/checked.h; sourceTree = SOURCE_ROOT; }; 48789AF12061776100A7D848 /* core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = core.h; path = source/core.h; sourceTree = SOURCE_ROOT; }; 48789AF22061776100A7D848 /* unchecked.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = unchecked.h; path = source/unchecked.h; sourceTree = SOURCE_ROOT; }; 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mimarksattributescommand.cpp; path = source/commands/mimarksattributescommand.cpp; sourceTree = SOURCE_ROOT; }; 487C5A861AB88B93002AF48A /* mimarksattributescommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mimarksattributescommand.h; path = source/commands/mimarksattributescommand.h; sourceTree = SOURCE_ROOT; }; 488563CF23CD00C4007B5659 /* taxonomy.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = taxonomy.cpp; path = source/datastructures/taxonomy.cpp; sourceTree = SOURCE_ROOT; }; 488563D023CD00C4007B5659 /* taxonomy.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = taxonomy.hpp; path = source/datastructures/taxonomy.hpp; sourceTree = SOURCE_ROOT; }; 48883FFB20C6D6C000CAF112 /* compare.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compare.h; path = source/datastructures/compare.h; sourceTree = SOURCE_ROOT; }; 488841631CC6C34900C5E972 /* renamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = renamefilecommand.cpp; path = source/commands/renamefilecommand.cpp; sourceTree = SOURCE_ROOT; }; 488841641CC6C34900C5E972 /* renamefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = renamefilecommand.h; path = source/commands/renamefilecommand.h; sourceTree = SOURCE_ROOT; }; 4889EA201E8962D50054E0BB /* summary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = summary.cpp; path = source/summary.cpp; sourceTree = SOURCE_ROOT; }; 4889EA211E8962D50054E0BB /* summary.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = summary.hpp; path = source/summary.hpp; sourceTree = SOURCE_ROOT; }; 488C1DE8242D102B00BDCCB4 /* optidb.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = optidb.cpp; path = source/datastructures/optidb.cpp; sourceTree = SOURCE_ROOT; }; 488C1DE9242D102B00BDCCB4 /* optidb.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = optidb.hpp; path = source/datastructures/optidb.hpp; sourceTree = SOURCE_ROOT; }; 48910D411D5243E500F60EDB /* mergecountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergecountcommand.cpp; path = source/commands/mergecountcommand.cpp; sourceTree = SOURCE_ROOT; }; 48910D421D5243E500F60EDB /* mergecountcommand.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mergecountcommand.hpp; path = source/commands/mergecountcommand.hpp; sourceTree = SOURCE_ROOT; }; 48910D451D58CAD700F60EDB /* opticluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = opticluster.cpp; path = source/opticluster.cpp; sourceTree = SOURCE_ROOT; }; 48910D491D58CBA300F60EDB /* optimatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optimatrix.cpp; path = source/datastructures/optimatrix.cpp; sourceTree = SOURCE_ROOT; }; 48910D4A1D58CBA300F60EDB /* optimatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = optimatrix.h; path = source/datastructures/optimatrix.h; sourceTree = SOURCE_ROOT; }; 48910D4C1D58CBFC00F60EDB /* opticluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = opticluster.h; path = source/opticluster.h; sourceTree = SOURCE_ROOT; }; 48910D4D1D58E26C00F60EDB /* testopticluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testopticluster.cpp; path = TestMothur/testopticluster.cpp; sourceTree = SOURCE_ROOT; }; 48910D4E1D58E26C00F60EDB /* testopticluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testopticluster.h; path = TestMothur/testopticluster.h; sourceTree = SOURCE_ROOT; }; 48910D4F1D58E26C00F60EDB /* distcdataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = distcdataset.h; path = TestMothur/distcdataset.h; sourceTree = SOURCE_ROOT; }; 48910D501D58E26C00F60EDB /* distcdataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = distcdataset.cpp; path = TestMothur/distcdataset.cpp; sourceTree = SOURCE_ROOT; }; 489387F42107A60C00284329 /* testoptirefmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testoptirefmatrix.cpp; path = TestMothur/testoptirefmatrix.cpp; sourceTree = SOURCE_ROOT; }; 489387F52107A60C00284329 /* testoptirefmatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testoptirefmatrix.hpp; path = TestMothur/testoptirefmatrix.hpp; sourceTree = SOURCE_ROOT; }; 489387F7210F633E00284329 /* testOligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testOligos.cpp; path = TestMothur/testcontainers/testOligos.cpp; sourceTree = SOURCE_ROOT; }; 489387F8210F633E00284329 /* testOligos.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testOligos.hpp; path = TestMothur/testcontainers/testOligos.hpp; sourceTree = SOURCE_ROOT; }; 48998B68242E785100DBD0A9 /* onegapdist.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = onegapdist.cpp; path = source/calculators/onegapdist.cpp; sourceTree = SOURCE_ROOT; }; 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vsearchfileparser.cpp; path = source/vsearchfileparser.cpp; sourceTree = SOURCE_ROOT; }; 489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = vsearchfileparser.h; path = source/vsearchfileparser.h; sourceTree = SOURCE_ROOT; }; 48A0552E2490066C00D0F97F /* sffread.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sffread.cpp; path = source/datastructures/sffread.cpp; sourceTree = SOURCE_ROOT; }; 48A0552F2490066C00D0F97F /* sffread.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = sffread.hpp; path = source/datastructures/sffread.hpp; sourceTree = SOURCE_ROOT; }; 48A055312491577800D0F97F /* sffheader.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sffheader.cpp; path = source/datastructures/sffheader.cpp; sourceTree = SOURCE_ROOT; }; 48A055322491577800D0F97F /* sffheader.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = sffheader.hpp; path = source/datastructures/sffheader.hpp; sourceTree = SOURCE_ROOT; }; 48A0B8EA2547282600726384 /* biom.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = biom.cpp; path = source/datastructures/biom.cpp; sourceTree = SOURCE_ROOT; }; 48A0B8EB2547282600726384 /* biom.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = biom.hpp; path = source/datastructures/biom.hpp; sourceTree = SOURCE_ROOT; }; 48A0B8EF25472C4500726384 /* biomhdf5.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = biomhdf5.cpp; path = source/datastructures/biomhdf5.cpp; sourceTree = SOURCE_ROOT; }; 48A0B8F025472C4500726384 /* biomhdf5.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = biomhdf5.hpp; path = source/datastructures/biomhdf5.hpp; sourceTree = SOURCE_ROOT; }; 48A0B8F425472C6500726384 /* biomsimple.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = biomsimple.cpp; path = source/datastructures/biomsimple.cpp; sourceTree = SOURCE_ROOT; }; 48A0B8F525472C6500726384 /* biomsimple.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = biomsimple.hpp; path = source/datastructures/biomsimple.hpp; sourceTree = SOURCE_ROOT; }; 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testrenamefilecommand.cpp; path = TestMothur/testcommands/testrenamefilecommand.cpp; sourceTree = SOURCE_ROOT; }; 48A11C6D1CDA40F0003481D8 /* testrenamefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testrenamefilecommand.h; path = TestMothur/testcommands/testrenamefilecommand.h; sourceTree = SOURCE_ROOT; }; 48B01D2720163594006BE140 /* clusterfitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clusterfitcommand.cpp; path = source/commands/clusterfitcommand.cpp; sourceTree = SOURCE_ROOT; }; 48B01D2820163594006BE140 /* clusterfitcommand.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = clusterfitcommand.hpp; path = source/commands/clusterfitcommand.hpp; sourceTree = SOURCE_ROOT; }; 48B01D2A2016470F006BE140 /* sensspeccalc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sensspeccalc.cpp; path = source/sensspeccalc.cpp; sourceTree = SOURCE_ROOT; }; 48B01D2B2016470F006BE140 /* sensspeccalc.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sensspeccalc.hpp; path = source/sensspeccalc.hpp; sourceTree = SOURCE_ROOT; }; 48B44EED1FB5006500789C45 /* currentfile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = currentfile.cpp; path = source/currentfile.cpp; sourceTree = SOURCE_ROOT; }; 48B44EF01FB9EF8200789C45 /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = utils.cpp; path = source/utils.cpp; sourceTree = SOURCE_ROOT; }; 48B44EF11FB9EF8200789C45 /* utils.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = utils.hpp; path = source/utils.hpp; sourceTree = SOURCE_ROOT; }; 48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testrenameseqscommand.cpp; path = TestMothur/testcommands/testrenameseqscommand.cpp; sourceTree = SOURCE_ROOT; }; 48B662021BBB1B6600997EE4 /* testrenameseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testrenameseqscommand.h; path = TestMothur/testcommands/testrenameseqscommand.h; sourceTree = SOURCE_ROOT; }; 48BD4EB621F7724C008EA73D /* filefile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = filefile.cpp; path = source/datastructures/filefile.cpp; sourceTree = SOURCE_ROOT; }; 48BD4EB721F7724C008EA73D /* filefile.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = filefile.hpp; path = source/datastructures/filefile.hpp; sourceTree = SOURCE_ROOT; }; 48BDDA6F1EC9D31400F0F6C0 /* sharedrabundvectors.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sharedrabundvectors.hpp; path = source/datastructures/sharedrabundvectors.hpp; sourceTree = SOURCE_ROOT; }; 48BDDA701EC9D31400F0F6C0 /* sharedrabundvectors.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrabundvectors.cpp; path = source/datastructures/sharedrabundvectors.cpp; sourceTree = SOURCE_ROOT; }; 48BDDA731ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrabundfloatvectors.cpp; path = source/datastructures/sharedrabundfloatvectors.cpp; sourceTree = SOURCE_ROOT; }; 48BDDA741ECA067000F0F6C0 /* sharedrabundfloatvectors.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sharedrabundfloatvectors.hpp; path = source/datastructures/sharedrabundfloatvectors.hpp; sourceTree = SOURCE_ROOT; }; 48BDDA771ECA3B8E00F0F6C0 /* rabundfloatvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rabundfloatvector.cpp; path = source/datastructures/rabundfloatvector.cpp; sourceTree = SOURCE_ROOT; }; 48BDDA781ECA3B8E00F0F6C0 /* rabundfloatvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = rabundfloatvector.hpp; path = source/datastructures/rabundfloatvector.hpp; sourceTree = SOURCE_ROOT; }; 48C51DEE1A76B870004ECDF1 /* fastqread.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = fastqread.h; path = source/datastructures/fastqread.h; sourceTree = SOURCE_ROOT; }; 48C51DEF1A76B888004ECDF1 /* fastqread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fastqread.cpp; path = source/datastructures/fastqread.cpp; sourceTree = SOURCE_ROOT; }; 48C51DF11A793EFE004ECDF1 /* kmeralign.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmeralign.cpp; path = source/datastructures/kmeralign.cpp; sourceTree = SOURCE_ROOT; }; 48C51DF21A793EFE004ECDF1 /* kmeralign.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kmeralign.h; path = source/datastructures/kmeralign.h; sourceTree = SOURCE_ROOT; }; 48C728641B66A77800D40830 /* testsequence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testsequence.cpp; path = TestMothur/testcontainers/testsequence.cpp; sourceTree = SOURCE_ROOT; }; 48C728681B69598400D40830 /* testmergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testmergegroupscommand.cpp; path = TestMothur/testcommands/testmergegroupscommand.cpp; sourceTree = SOURCE_ROOT; }; 48C728691B69598400D40830 /* testmergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testmergegroupscommand.h; path = TestMothur/testcommands/testmergegroupscommand.h; sourceTree = SOURCE_ROOT; }; 48C7286F1B6AB3B900D40830 /* testremovegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testremovegroupscommand.cpp; path = TestMothur/testcommands/testremovegroupscommand.cpp; sourceTree = SOURCE_ROOT; }; 48C728701B6AB3B900D40830 /* testremovegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testremovegroupscommand.h; path = TestMothur/testcommands/testremovegroupscommand.h; sourceTree = SOURCE_ROOT; }; 48C728731B6AB4CD00D40830 /* testgetgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testgetgroupscommand.cpp; path = TestMothur/testcommands/testgetgroupscommand.cpp; sourceTree = SOURCE_ROOT; }; 48C728741B6AB4CD00D40830 /* testgetgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testgetgroupscommand.h; path = TestMothur/testcommands/testgetgroupscommand.h; sourceTree = SOURCE_ROOT; }; 48C728761B6AB4EE00D40830 /* testsequence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testsequence.h; path = TestMothur/testcontainers/testsequence.h; sourceTree = SOURCE_ROOT; }; 48C728771B728D6B00D40830 /* biominfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = biominfocommand.cpp; path = source/commands/biominfocommand.cpp; sourceTree = SOURCE_ROOT; }; 48C728781B728D6B00D40830 /* biominfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = biominfocommand.h; path = source/commands/biominfocommand.h; sourceTree = SOURCE_ROOT; }; 48CC010E1EB79E49009D61E6 /* fakeoligos.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = fakeoligos.h; path = fakes/fakeoligos.h; sourceTree = ""; }; 48CF76EE21BEBDD300B2FB5C /* mergeotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergeotuscommand.cpp; path = source/commands/mergeotuscommand.cpp; sourceTree = SOURCE_ROOT; }; 48CF76EF21BEBDD300B2FB5C /* mergeotuscommand.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mergeotuscommand.hpp; path = source/commands/mergeotuscommand.hpp; sourceTree = SOURCE_ROOT; }; 48D6E9661CA42389008DF76B /* testvsearchfileparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testvsearchfileparser.cpp; path = TestMothur/testvsearchfileparser.cpp; sourceTree = SOURCE_ROOT; }; 48D6E9671CA42389008DF76B /* testvsearchfileparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testvsearchfileparser.h; path = TestMothur/testvsearchfileparser.h; sourceTree = SOURCE_ROOT; }; 48D6E9691CA4262A008DF76B /* dataset.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dataset.cpp; path = TestMothur/dataset.cpp; sourceTree = SOURCE_ROOT; }; 48D6E96A1CA4262A008DF76B /* dataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dataset.h; path = TestMothur/dataset.h; sourceTree = SOURCE_ROOT; }; 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makefilecommand.cpp; path = source/commands/makefilecommand.cpp; sourceTree = SOURCE_ROOT; }; 48DB37B21B3B27E000C372A4 /* makefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makefilecommand.h; path = source/commands/makefilecommand.h; sourceTree = SOURCE_ROOT; }; 48E0230124BF488D00BFEA41 /* report.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = report.cpp; path = source/datastructures/report.cpp; sourceTree = SOURCE_ROOT; }; 48E0230224BF488D00BFEA41 /* report.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = report.hpp; path = source/datastructures/report.hpp; sourceTree = SOURCE_ROOT; }; 48E5443F1E9C292900FF6AB8 /* mcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mcc.cpp; path = source/calculators/mcc.cpp; sourceTree = SOURCE_ROOT; }; 48E544401E9C292900FF6AB8 /* mcc.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mcc.hpp; path = source/calculators/mcc.hpp; sourceTree = SOURCE_ROOT; }; 48E544431E9C2B1000FF6AB8 /* sensitivity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sensitivity.cpp; path = source/calculators/sensitivity.cpp; sourceTree = SOURCE_ROOT; }; 48E544441E9C2B1000FF6AB8 /* sensitivity.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sensitivity.hpp; path = source/calculators/sensitivity.hpp; sourceTree = SOURCE_ROOT; }; 48E544471E9C2BE100FF6AB8 /* specificity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = specificity.cpp; path = source/calculators/specificity.cpp; sourceTree = SOURCE_ROOT; }; 48E544481E9C2BE100FF6AB8 /* specificity.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = specificity.hpp; path = source/calculators/specificity.hpp; sourceTree = SOURCE_ROOT; }; 48E5444B1E9C2C8F00FF6AB8 /* tptn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tptn.cpp; path = source/calculators/tptn.cpp; sourceTree = SOURCE_ROOT; }; 48E5444C1E9C2C8F00FF6AB8 /* tptn.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tptn.hpp; path = source/calculators/tptn.hpp; sourceTree = SOURCE_ROOT; }; 48E5444F1E9C2CFD00FF6AB8 /* tp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tp.cpp; path = source/calculators/tp.cpp; sourceTree = SOURCE_ROOT; }; 48E544501E9C2CFD00FF6AB8 /* tp.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tp.hpp; path = source/calculators/tp.hpp; sourceTree = SOURCE_ROOT; }; 48E544531E9C2DF500FF6AB8 /* tn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tn.cpp; path = source/calculators/tn.cpp; sourceTree = SOURCE_ROOT; }; 48E544541E9C2DF500FF6AB8 /* tn.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tn.hpp; path = source/calculators/tn.hpp; sourceTree = SOURCE_ROOT; }; 48E544571E9C2E6500FF6AB8 /* fp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fp.cpp; path = source/calculators/fp.cpp; sourceTree = SOURCE_ROOT; }; 48E544581E9C2E6500FF6AB8 /* fp.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fp.hpp; path = source/calculators/fp.hpp; sourceTree = SOURCE_ROOT; }; 48E5445B1E9C2F0F00FF6AB8 /* fn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fn.cpp; path = source/calculators/fn.cpp; sourceTree = SOURCE_ROOT; }; 48E5445C1E9C2F0F00FF6AB8 /* fn.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fn.hpp; path = source/calculators/fn.hpp; sourceTree = SOURCE_ROOT; }; 48E5445F1E9C2FB800FF6AB8 /* fpfn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fpfn.cpp; path = source/calculators/fpfn.cpp; sourceTree = SOURCE_ROOT; }; 48E544601E9C2FB800FF6AB8 /* fpfn.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fpfn.hpp; path = source/calculators/fpfn.hpp; sourceTree = SOURCE_ROOT; }; 48E5446A1E9D3A8C00FF6AB8 /* f1score.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = f1score.cpp; path = source/calculators/f1score.cpp; sourceTree = SOURCE_ROOT; }; 48E5446B1E9D3A8C00FF6AB8 /* f1score.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = f1score.hpp; path = source/calculators/f1score.hpp; sourceTree = SOURCE_ROOT; }; 48E5446E1E9D3B2D00FF6AB8 /* accuracy.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = accuracy.cpp; path = source/calculators/accuracy.cpp; sourceTree = SOURCE_ROOT; }; 48E5446F1E9D3B2D00FF6AB8 /* accuracy.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = accuracy.hpp; path = source/calculators/accuracy.hpp; sourceTree = SOURCE_ROOT; }; 48E544721E9D3C1200FF6AB8 /* ppv.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ppv.cpp; path = source/calculators/ppv.cpp; sourceTree = SOURCE_ROOT; }; 48E544731E9D3C1200FF6AB8 /* ppv.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ppv.hpp; path = source/calculators/ppv.hpp; sourceTree = SOURCE_ROOT; }; 48E544761E9D3CE400FF6AB8 /* npv.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = npv.cpp; path = source/calculators/npv.cpp; sourceTree = SOURCE_ROOT; }; 48E544771E9D3CE400FF6AB8 /* npv.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = npv.hpp; path = source/calculators/npv.hpp; sourceTree = SOURCE_ROOT; }; 48E5447A1E9D3F0400FF6AB8 /* fdr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fdr.cpp; path = source/calculators/fdr.cpp; sourceTree = SOURCE_ROOT; }; 48E5447B1E9D3F0400FF6AB8 /* fdr.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = fdr.hpp; path = source/calculators/fdr.hpp; sourceTree = SOURCE_ROOT; }; 48E7E0A12278A21B00B74910 /* metrolognormal.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = metrolognormal.cpp; path = source/calculators/metrolognormal.cpp; sourceTree = SOURCE_ROOT; }; 48E7E0A42278AD4800B74910 /* diversityutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = diversityutils.cpp; path = source/calculators/diversityutils.cpp; sourceTree = SOURCE_ROOT; }; 48E7E0A52278AD4800B74910 /* diversityutils.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = diversityutils.hpp; path = source/calculators/diversityutils.hpp; sourceTree = SOURCE_ROOT; }; 48ED1E77235E1ACA003E66F7 /* scriptengine.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = scriptengine.cpp; path = source/engines/scriptengine.cpp; sourceTree = SOURCE_ROOT; }; 48ED1E78235E1ACA003E66F7 /* scriptengine.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = scriptengine.hpp; path = source/engines/scriptengine.hpp; sourceTree = SOURCE_ROOT; }; 48ED1E7B235E1BB4003E66F7 /* interactengine.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = interactengine.cpp; path = source/engines/interactengine.cpp; sourceTree = SOURCE_ROOT; }; 48ED1E7C235E1BB4003E66F7 /* interactengine.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = interactengine.hpp; path = source/engines/interactengine.hpp; sourceTree = SOURCE_ROOT; }; 48ED1E7F235E1D59003E66F7 /* batchengine.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = batchengine.cpp; path = source/engines/batchengine.cpp; sourceTree = SOURCE_ROOT; }; 48ED1E80235E1D59003E66F7 /* batchengine.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = batchengine.hpp; path = source/engines/batchengine.hpp; sourceTree = SOURCE_ROOT; }; 48ED1E8323689DE8003E66F7 /* srainfocommand.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = srainfocommand.cpp; path = source/commands/srainfocommand.cpp; sourceTree = SOURCE_ROOT; }; 48ED1E8423689DE8003E66F7 /* srainfocommand.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = srainfocommand.hpp; path = source/commands/srainfocommand.hpp; sourceTree = SOURCE_ROOT; }; 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeravsearchcommand.cpp; path = source/commands/chimeravsearchcommand.cpp; sourceTree = SOURCE_ROOT; }; 48EDB76B1D1320DD00F76E93 /* chimeravsearchcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeravsearchcommand.h; path = source/commands/chimeravsearchcommand.h; sourceTree = SOURCE_ROOT; }; 48F06CCB1D74BEC4004A45DD /* testphylotree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testphylotree.cpp; path = testclassifier/testphylotree.cpp; sourceTree = ""; }; 48F06CCC1D74BEC4004A45DD /* testphylotree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = testphylotree.hpp; path = testclassifier/testphylotree.hpp; sourceTree = ""; }; 48F1C16423D606050034DAAF /* makeclrcommand.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = makeclrcommand.cpp; path = source/commands/makeclrcommand.cpp; sourceTree = SOURCE_ROOT; }; 48F1C16523D606050034DAAF /* makeclrcommand.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = makeclrcommand.hpp; path = source/commands/makeclrcommand.hpp; sourceTree = SOURCE_ROOT; }; 48F1C16823D78D7B0034DAAF /* sharedclrvectors.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sharedclrvectors.cpp; path = source/datastructures/sharedclrvectors.cpp; sourceTree = SOURCE_ROOT; }; 48F1C16923D78D7B0034DAAF /* sharedclrvectors.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = sharedclrvectors.hpp; path = source/datastructures/sharedclrvectors.hpp; sourceTree = SOURCE_ROOT; }; 48F1C16C23D78F8D0034DAAF /* sharedclrvector.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = sharedclrvector.cpp; path = source/datastructures/sharedclrvector.cpp; sourceTree = SOURCE_ROOT; }; 48F1C16D23D78F8D0034DAAF /* sharedclrvector.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = sharedclrvector.hpp; path = source/datastructures/sharedclrvector.hpp; sourceTree = SOURCE_ROOT; }; 48F98E4C1A9CFD670005E81B /* completelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = completelinkage.cpp; path = source/completelinkage.cpp; sourceTree = SOURCE_ROOT; }; 48FB99C3209B69FA00FF9F6E /* optirefmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optirefmatrix.cpp; path = source/datastructures/optirefmatrix.cpp; sourceTree = SOURCE_ROOT; }; 48FB99C4209B69FA00FF9F6E /* optirefmatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = optirefmatrix.hpp; path = source/datastructures/optirefmatrix.hpp; sourceTree = SOURCE_ROOT; }; 48FB99C720A48EF700FF9F6E /* optidata.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optidata.cpp; path = source/datastructures/optidata.cpp; sourceTree = SOURCE_ROOT; }; 48FB99C820A48EF700FF9F6E /* optidata.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = optidata.hpp; path = source/datastructures/optidata.hpp; sourceTree = SOURCE_ROOT; }; 48FB99CA20A4AD7D00FF9F6E /* optiblastmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optiblastmatrix.cpp; path = source/datastructures/optiblastmatrix.cpp; sourceTree = SOURCE_ROOT; }; 48FB99CB20A4AD7D00FF9F6E /* optiblastmatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = optiblastmatrix.hpp; path = source/datastructures/optiblastmatrix.hpp; sourceTree = SOURCE_ROOT; }; 48FB99CD20A4F3FB00FF9F6E /* optifitcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optifitcluster.cpp; path = source/optifitcluster.cpp; sourceTree = SOURCE_ROOT; }; 48FB99CE20A4F3FB00FF9F6E /* optifitcluster.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = optifitcluster.hpp; path = source/optifitcluster.hpp; sourceTree = SOURCE_ROOT; }; 48FD9946243E5FB10017C521 /* Makefile_cluster */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Makefile_cluster; sourceTree = SOURCE_ROOT; }; 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classifysvmsharedcommand.cpp; path = source/commands/classifysvmsharedcommand.cpp; sourceTree = SOURCE_ROOT; }; 7B2181FF17AD777B00286E6A /* classifysvmsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classifysvmsharedcommand.h; path = source/commands/classifysvmsharedcommand.h; sourceTree = SOURCE_ROOT; }; 7B21820117AD77BD00286E6A /* svm.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = svm.cpp; path = source/svm/svm.cpp; sourceTree = SOURCE_ROOT; }; 7B21820217AD77BD00286E6A /* svm.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = svm.hpp; path = source/svm/svm.hpp; sourceTree = SOURCE_ROOT; }; 7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = refchimeratest.h; path = source/refchimeratest.h; sourceTree = SOURCE_ROOT; }; 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = refchimeratest.cpp; path = source/refchimeratest.cpp; sourceTree = SOURCE_ROOT; }; 8DD76FB20486AB0100D96B5E /* mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mothur; sourceTree = BUILT_PRODUCTS_DIR; }; A70056E5156A93D000924A2D /* getotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getotuscommand.cpp; path = source/commands/getotuscommand.cpp; sourceTree = SOURCE_ROOT; }; A70056E8156A93E300924A2D /* getotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getotuscommand.h; path = source/commands/getotuscommand.h; sourceTree = SOURCE_ROOT; }; A70056E9156AB6D400924A2D /* removeotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removeotuscommand.h; path = source/commands/removeotuscommand.h; sourceTree = SOURCE_ROOT; }; A70056EA156AB6E500924A2D /* removeotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removeotuscommand.cpp; path = source/commands/removeotuscommand.cpp; sourceTree = SOURCE_ROOT; }; A70332B512D3A13400761E33 /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = SOURCE_ROOT; }; A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getdistscommand.h; path = source/commands/getdistscommand.h; sourceTree = SOURCE_ROOT; }; A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getdistscommand.cpp; path = source/commands/getdistscommand.cpp; sourceTree = SOURCE_ROOT; }; A7132EAE184E76EB00AAA402 /* communitytype.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = communitytype.h; path = source/communitytype/communitytype.h; sourceTree = SOURCE_ROOT; }; A7132EB2184E792700AAA402 /* communitytype.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = communitytype.cpp; path = source/communitytype/communitytype.cpp; sourceTree = SOURCE_ROOT; }; A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readphylipvector.h; path = source/read/readphylipvector.h; sourceTree = SOURCE_ROOT; }; A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readphylipvector.cpp; path = source/read/readphylipvector.cpp; sourceTree = SOURCE_ROOT; }; A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = nmdscommand.h; path = source/commands/nmdscommand.h; sourceTree = SOURCE_ROOT; }; A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = nmdscommand.cpp; path = source/commands/nmdscommand.cpp; sourceTree = SOURCE_ROOT; }; A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lefsecommand.cpp; path = source/commands/lefsecommand.cpp; sourceTree = SOURCE_ROOT; }; A7190B211768E0DF00A9AFA6 /* lefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = lefsecommand.h; path = source/commands/lefsecommand.h; sourceTree = SOURCE_ROOT; }; A71CB15E130B04A2001E7287 /* anosimcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = anosimcommand.cpp; path = source/commands/anosimcommand.cpp; sourceTree = SOURCE_ROOT; }; A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = anosimcommand.h; path = source/commands/anosimcommand.h; sourceTree = SOURCE_ROOT; }; A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergegroupscommand.h; path = source/commands/mergegroupscommand.h; sourceTree = SOURCE_ROOT; }; A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergegroupscommand.cpp; path = source/commands/mergegroupscommand.cpp; sourceTree = SOURCE_ROOT; }; A721AB66161C570F009860A1 /* alignnode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignnode.cpp; path = source/classifier/alignnode.cpp; sourceTree = SOURCE_ROOT; }; A721AB67161C570F009860A1 /* alignnode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = alignnode.h; path = source/classifier/alignnode.h; sourceTree = SOURCE_ROOT; }; A721AB68161C570F009860A1 /* aligntree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = aligntree.cpp; path = source/classifier/aligntree.cpp; sourceTree = SOURCE_ROOT; }; A721AB69161C570F009860A1 /* aligntree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = aligntree.h; path = source/classifier/aligntree.h; sourceTree = SOURCE_ROOT; }; A721AB6D161C572A009860A1 /* kmernode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmernode.cpp; path = source/classifier/kmernode.cpp; sourceTree = SOURCE_ROOT; }; A721AB6E161C572A009860A1 /* kmernode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kmernode.h; path = source/classifier/kmernode.h; sourceTree = SOURCE_ROOT; }; A721AB6F161C572A009860A1 /* kmertree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmertree.cpp; path = source/classifier/kmertree.cpp; sourceTree = SOURCE_ROOT; }; A721AB70161C572A009860A1 /* kmertree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kmertree.h; path = source/classifier/kmertree.h; sourceTree = SOURCE_ROOT; }; A721AB73161C573B009860A1 /* taxonomynode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = taxonomynode.cpp; path = source/classifier/taxonomynode.cpp; sourceTree = SOURCE_ROOT; }; A721AB74161C573B009860A1 /* taxonomynode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = taxonomynode.h; path = source/classifier/taxonomynode.h; sourceTree = SOURCE_ROOT; }; A7222D711856276C0055A993 /* sharedjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = sharedjsd.h; path = source/calculators/sharedjsd.h; sourceTree = SOURCE_ROOT; }; A7222D721856277C0055A993 /* sharedjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedjsd.cpp; path = source/calculators/sharedjsd.cpp; sourceTree = SOURCE_ROOT; }; A724D2B4153C8600000A826F /* makebiomcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makebiomcommand.h; path = source/commands/makebiomcommand.h; sourceTree = SOURCE_ROOT; }; A724D2B6153C8628000A826F /* makebiomcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makebiomcommand.cpp; path = source/commands/makebiomcommand.cpp; sourceTree = SOURCE_ROOT; }; A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removerarecommand.h; path = source/commands/removerarecommand.h; sourceTree = SOURCE_ROOT; }; A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removerarecommand.cpp; path = source/commands/removerarecommand.cpp; sourceTree = SOURCE_ROOT; }; A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothurmetastats.h; path = source/metastats/mothurmetastats.h; sourceTree = SOURCE_ROOT; }; A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothurmetastats.cpp; path = source/metastats/mothurmetastats.cpp; sourceTree = SOURCE_ROOT; }; A741744A175CD9B1007DF49B /* makelefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makelefsecommand.cpp; path = source/commands/makelefsecommand.cpp; sourceTree = SOURCE_ROOT; }; A741744B175CD9B1007DF49B /* makelefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makelefsecommand.h; path = source/commands/makelefsecommand.h; sourceTree = SOURCE_ROOT; }; A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sequencecountparser.cpp; path = source/datastructures/sequencecountparser.cpp; sourceTree = SOURCE_ROOT; }; A741FAD415D168A00067BCC5 /* sequencecountparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sequencecountparser.h; path = source/datastructures/sequencecountparser.h; sourceTree = SOURCE_ROOT; }; A747EC6F181EA0E500345732 /* sracommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = sracommand.h; path = source/commands/sracommand.h; sourceTree = SOURCE_ROOT; }; A747EC70181EA0F900345732 /* sracommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sracommand.cpp; path = source/commands/sracommand.cpp; sourceTree = SOURCE_ROOT; }; A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kruskalwalliscommand.cpp; path = source/commands/kruskalwalliscommand.cpp; sourceTree = SOURCE_ROOT; }; A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = kruskalwalliscommand.h; path = source/commands/kruskalwalliscommand.h; sourceTree = SOURCE_ROOT; }; A74C06E616A9C097008390A3 /* primerdesigncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = primerdesigncommand.h; path = source/commands/primerdesigncommand.h; sourceTree = SOURCE_ROOT; }; A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = primerdesigncommand.cpp; path = source/commands/primerdesigncommand.cpp; sourceTree = SOURCE_ROOT; }; A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimerauchimecommand.h; path = source/commands/chimerauchimecommand.h; sourceTree = SOURCE_ROOT; }; A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimerauchimecommand.cpp; path = source/commands/chimerauchimecommand.cpp; sourceTree = SOURCE_ROOT; }; A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = counttable.cpp; path = source/datastructures/counttable.cpp; sourceTree = SOURCE_ROOT; }; A74D59A6159A1E3600043046 /* counttable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = counttable.h; path = source/datastructures/counttable.h; sourceTree = SOURCE_ROOT; }; A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = summaryqualcommand.h; path = source/commands/summaryqualcommand.h; sourceTree = SOURCE_ROOT; }; A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = summaryqualcommand.cpp; path = source/commands/summaryqualcommand.cpp; sourceTree = SOURCE_ROOT; }; A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = getmetacommunitycommand.h; path = source/commands/getmetacommunitycommand.h; sourceTree = SOURCE_ROOT; }; A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getmetacommunitycommand.cpp; path = source/commands/getmetacommunitycommand.cpp; sourceTree = SOURCE_ROOT; }; A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = qFinderDMM.cpp; path = source/communitytype/qFinderDMM.cpp; sourceTree = SOURCE_ROOT; }; A7548FAF171440ED00B1F05A /* qFinderDMM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = qFinderDMM.h; path = source/communitytype/qFinderDMM.h; sourceTree = SOURCE_ROOT; }; A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = homovacommand.h; path = source/commands/homovacommand.h; sourceTree = SOURCE_ROOT; }; A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = homovacommand.cpp; path = source/commands/homovacommand.cpp; sourceTree = SOURCE_ROOT; }; A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pcrseqscommand.h; path = source/commands/pcrseqscommand.h; sourceTree = SOURCE_ROOT; }; A7730EFD13967241007433A3 /* countseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = countseqscommand.h; path = source/commands/countseqscommand.h; sourceTree = SOURCE_ROOT; }; A7730EFE13967241007433A3 /* countseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = countseqscommand.cpp; path = source/commands/countseqscommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A774101214695AF60098E6AC /* shhhseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shhhseqscommand.h; path = source/commands/shhhseqscommand.h; sourceTree = SOURCE_ROOT; }; A774101314695AF60098E6AC /* shhhseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shhhseqscommand.cpp; path = source/commands/shhhseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A774104614696F320098E6AC /* myseqdist.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = myseqdist.cpp; path = source/myseqdist.cpp; sourceTree = SOURCE_ROOT; }; A774104714696F320098E6AC /* myseqdist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = myseqdist.h; path = source/myseqdist.h; sourceTree = SOURCE_ROOT; }; A77410F414697C300098E6AC /* seqnoise.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = seqnoise.cpp; path = source/seqnoise.cpp; sourceTree = SOURCE_ROOT; }; A77410F514697C300098E6AC /* seqnoise.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = seqnoise.h; path = source/seqnoise.h; sourceTree = SOURCE_ROOT; }; A77916E6176F7F7600EEFE18 /* designmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = designmap.cpp; path = source/datastructures/designmap.cpp; sourceTree = SOURCE_ROOT; }; A77916E7176F7F7600EEFE18 /* designmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = designmap.h; path = source/datastructures/designmap.h; sourceTree = SOURCE_ROOT; }; A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = deuniquetreecommand.h; path = source/commands/deuniquetreecommand.h; sourceTree = SOURCE_ROOT; }; A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = deuniquetreecommand.cpp; path = source/commands/deuniquetreecommand.cpp; sourceTree = SOURCE_ROOT; }; A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = sparcccommand.h; path = source/commands/sparcccommand.h; sourceTree = SOURCE_ROOT; }; A77B7184173D2240002163C2 /* sparcccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sparcccommand.cpp; path = source/commands/sparcccommand.cpp; sourceTree = SOURCE_ROOT; }; A77B7189173D40E4002163C2 /* calcsparcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = calcsparcc.cpp; path = source/calcsparcc.cpp; sourceTree = SOURCE_ROOT; }; A77B718A173D40E4002163C2 /* calcsparcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = calcsparcc.h; path = source/calcsparcc.h; sourceTree = SOURCE_ROOT; }; A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = createdatabasecommand.h; path = source/commands/createdatabasecommand.h; sourceTree = SOURCE_ROOT; }; A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = createdatabasecommand.cpp; path = source/commands/createdatabasecommand.cpp; sourceTree = SOURCE_ROOT; }; A7876A25152A017C00A0AE86 /* subsample.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = subsample.cpp; path = source/subsample.cpp; sourceTree = SOURCE_ROOT; }; A7876A28152A018B00A0AE86 /* subsample.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = subsample.h; path = source/subsample.h; sourceTree = SOURCE_ROOT; }; A79234D513C74BF6002B08E2 /* mothurfisher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothurfisher.h; path = source/metastats/mothurfisher.h; sourceTree = SOURCE_ROOT; }; A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothurfisher.cpp; path = source/metastats/mothurfisher.cpp; sourceTree = SOURCE_ROOT; }; A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = countgroupscommand.h; path = source/commands/countgroupscommand.h; sourceTree = SOURCE_ROOT; }; A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = countgroupscommand.cpp; path = source/commands/countgroupscommand.cpp; sourceTree = SOURCE_ROOT; }; A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergetaxsummarycommand.h; path = source/commands/mergetaxsummarycommand.h; sourceTree = SOURCE_ROOT; }; A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergetaxsummarycommand.cpp; path = source/commands/mergetaxsummarycommand.cpp; sourceTree = SOURCE_ROOT; }; A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makefastqcommand.h; path = source/commands/makefastqcommand.h; sourceTree = SOURCE_ROOT; }; A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makefastqcommand.cpp; path = source/commands/makefastqcommand.cpp; sourceTree = SOURCE_ROOT; }; A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = filtersharedcommand.cpp; path = source/commands/filtersharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A79EEF8816971D640006DEC1 /* filtersharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = filtersharedcommand.h; path = source/commands/filtersharedcommand.h; sourceTree = SOURCE_ROOT; }; A7A067191562946F0095C8C5 /* listotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = listotuscommand.cpp; path = source/commands/listotuscommand.cpp; sourceTree = SOURCE_ROOT; }; A7A0671C156294810095C8C5 /* listotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = listotuscommand.h; path = source/commands/listotuscommand.h; sourceTree = SOURCE_ROOT; }; A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makecontigscommand.h; path = source/commands/makecontigscommand.h; sourceTree = SOURCE_ROOT; }; A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makecontigscommand.cpp; path = source/commands/makecontigscommand.cpp; sourceTree = SOURCE_ROOT; }; A7A09B0E18773BF700FAA081 /* shannonrange.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = shannonrange.h; path = source/calculators/shannonrange.h; sourceTree = SOURCE_ROOT; }; A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shannonrange.cpp; path = source/calculators/shannonrange.cpp; sourceTree = SOURCE_ROOT; }; A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sortseqscommand.cpp; path = source/commands/sortseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sortseqscommand.h; path = source/commands/sortseqscommand.h; sourceTree = SOURCE_ROOT; }; A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = otuassociationcommand.cpp; path = source/commands/otuassociationcommand.cpp; sourceTree = SOURCE_ROOT; }; A7A3C8C814D041AD00B1BFBE /* otuassociationcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = otuassociationcommand.h; path = source/commands/otuassociationcommand.h; sourceTree = SOURCE_ROOT; }; A7A61F1A130035C800E05B6B /* LICENSE.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = LICENSE.md; sourceTree = SOURCE_ROOT; }; A7A61F2B130062E000E05B6B /* amovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = amovacommand.h; path = source/commands/amovacommand.h; sourceTree = SOURCE_ROOT; }; A7A61F2C130062E000E05B6B /* amovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = amovacommand.cpp; path = source/commands/amovacommand.cpp; sourceTree = SOURCE_ROOT; }; A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = currentfile.h; path = source/currentfile.h; sourceTree = SOURCE_ROOT; }; A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removedistscommand.cpp; path = source/commands/removedistscommand.cpp; sourceTree = SOURCE_ROOT; }; A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removedistscommand.h; path = source/commands/removedistscommand.h; sourceTree = SOURCE_ROOT; }; A7B093BE18579EF600843CD1 /* pam.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = pam.h; path = source/communitytype/pam.h; sourceTree = SOURCE_ROOT; }; A7B093BF18579F0400843CD1 /* pam.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pam.cpp; path = source/communitytype/pam.cpp; sourceTree = SOURCE_ROOT; }; A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = myPerseus.cpp; path = source/chimera/myPerseus.cpp; sourceTree = SOURCE_ROOT; }; A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = myPerseus.h; path = source/chimera/myPerseus.h; sourceTree = SOURCE_ROOT; }; A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeraperseuscommand.h; path = source/commands/chimeraperseuscommand.h; sourceTree = SOURCE_ROOT; }; A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeraperseuscommand.cpp; path = source/commands/chimeraperseuscommand.cpp; sourceTree = SOURCE_ROOT; }; A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cooccurrencecommand.cpp; path = source/commands/cooccurrencecommand.cpp; sourceTree = SOURCE_ROOT; }; A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = cooccurrencecommand.h; path = source/commands/cooccurrencecommand.h; sourceTree = SOURCE_ROOT; }; A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trialSwap2.cpp; path = source/trialSwap2.cpp; sourceTree = SOURCE_ROOT; }; A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = trialswap2.h; path = source/trialswap2.h; sourceTree = SOURCE_ROOT; }; A7C7DAB615DA75760059B0CF /* sffmultiplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sffmultiplecommand.h; path = source/commands/sffmultiplecommand.h; sourceTree = SOURCE_ROOT; }; A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sffmultiplecommand.cpp; path = source/commands/sffmultiplecommand.cpp; sourceTree = SOURCE_ROOT; }; A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = renameseqscommand.h; path = source/commands/renameseqscommand.h; sourceTree = SOURCE_ROOT; }; A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = renameseqscommand.cpp; path = source/commands/renameseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7D395C2184FA39300A350D7 /* kmeans.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = kmeans.h; path = source/communitytype/kmeans.h; sourceTree = SOURCE_ROOT; }; A7D395C3184FA3A200A350D7 /* kmeans.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmeans.cpp; path = source/communitytype/kmeans.cpp; sourceTree = SOURCE_ROOT; }; A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = treereader.h; path = source/read/treereader.h; sourceTree = SOURCE_ROOT; }; A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = treereader.cpp; path = source/read/treereader.cpp; sourceTree = SOURCE_ROOT; }; A7D9378917B146B5001E90B0 /* wilcox.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = wilcox.cpp; path = source/wilcox.cpp; sourceTree = SOURCE_ROOT; }; A7D9378B17B15215001E90B0 /* wilcox.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = wilcox.h; path = source/wilcox.h; sourceTree = SOURCE_ROOT; }; A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = commandparameter.h; path = source/commandparameter.h; sourceTree = SOURCE_ROOT; }; A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sparsedistancematrix.cpp; path = source/datastructures/sparsedistancematrix.cpp; sourceTree = SOURCE_ROOT; }; A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sparsedistancematrix.h; path = source/datastructures/sparsedistancematrix.h; sourceTree = SOURCE_ROOT; }; A7E6F69C17427CF2006775E2 /* makelookupcommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = makelookupcommand.h; path = source/commands/makelookupcommand.h; sourceTree = SOURCE_ROOT; }; A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makelookupcommand.cpp; path = source/commands/makelookupcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ace.cpp; path = source/calculators/ace.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ace.h; path = source/calculators/ace.h; sourceTree = SOURCE_ROOT; }; A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = aligncommand.cpp; path = source/commands/aligncommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65212D37EC300DA6239 /* aligncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = aligncommand.h; path = source/commands/aligncommand.h; sourceTree = SOURCE_ROOT; }; A7E9B65312D37EC300DA6239 /* alignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignment.cpp; path = source/datastructures/alignment.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65412D37EC300DA6239 /* alignment.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = alignment.hpp; path = source/datastructures/alignment.hpp; sourceTree = SOURCE_ROOT; }; A7E9B65512D37EC300DA6239 /* alignmentcell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignmentcell.cpp; path = source/datastructures/alignmentcell.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65612D37EC300DA6239 /* alignmentcell.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = alignmentcell.hpp; path = source/datastructures/alignmentcell.hpp; sourceTree = SOURCE_ROOT; }; A7E9B65712D37EC300DA6239 /* alignmentdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignmentdb.cpp; path = source/datastructures/alignmentdb.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65812D37EC300DA6239 /* alignmentdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = alignmentdb.h; path = source/datastructures/alignmentdb.h; sourceTree = SOURCE_ROOT; }; A7E9B65A12D37EC300DA6239 /* bayesian.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bayesian.cpp; path = source/classifier/bayesian.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65B12D37EC300DA6239 /* bayesian.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bayesian.h; path = source/classifier/bayesian.h; sourceTree = SOURCE_ROOT; }; A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bellerophon.cpp; path = source/chimera/bellerophon.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65D12D37EC300DA6239 /* bellerophon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bellerophon.h; path = source/chimera/bellerophon.h; sourceTree = SOURCE_ROOT; }; A7E9B65E12D37EC300DA6239 /* bergerparker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bergerparker.cpp; path = source/calculators/bergerparker.cpp; sourceTree = SOURCE_ROOT; }; A7E9B65F12D37EC300DA6239 /* bergerparker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bergerparker.h; path = source/calculators/bergerparker.h; sourceTree = SOURCE_ROOT; }; A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = binsequencecommand.cpp; path = source/commands/binsequencecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B66112D37EC300DA6239 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = binsequencecommand.h; path = source/commands/binsequencecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B66612D37EC400DA6239 /* boneh.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = boneh.cpp; path = source/calculators/boneh.cpp; sourceTree = SOURCE_ROOT; }; A7E9B66712D37EC400DA6239 /* boneh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = boneh.h; path = source/calculators/boneh.h; sourceTree = SOURCE_ROOT; }; A7E9B66812D37EC400DA6239 /* bootstrap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bootstrap.cpp; path = source/calculators/bootstrap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B66912D37EC400DA6239 /* bootstrap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bootstrap.h; path = source/calculators/bootstrap.h; sourceTree = SOURCE_ROOT; }; A7E9B66C12D37EC400DA6239 /* bstick.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bstick.cpp; path = source/calculators/bstick.cpp; sourceTree = SOURCE_ROOT; }; A7E9B66D12D37EC400DA6239 /* bstick.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bstick.h; path = source/calculators/bstick.h; sourceTree = SOURCE_ROOT; }; A7E9B66F12D37EC400DA6239 /* calculator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = calculator.h; path = source/calculators/calculator.h; sourceTree = SOURCE_ROOT; }; A7E9B67012D37EC400DA6239 /* canberra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = canberra.cpp; path = source/calculators/canberra.cpp; sourceTree = SOURCE_ROOT; }; A7E9B67112D37EC400DA6239 /* canberra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = canberra.h; path = source/calculators/canberra.h; sourceTree = SOURCE_ROOT; }; A7E9B67412D37EC400DA6239 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ccode.cpp; path = source/chimera/ccode.cpp; sourceTree = SOURCE_ROOT; }; A7E9B67512D37EC400DA6239 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = ccode.h; path = source/chimera/ccode.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; A7E9B67612D37EC400DA6239 /* chao1.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chao1.cpp; path = source/calculators/chao1.cpp; sourceTree = SOURCE_ROOT; }; A7E9B67712D37EC400DA6239 /* chao1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chao1.h; path = source/calculators/chao1.h; sourceTree = SOURCE_ROOT; }; A7E9B67812D37EC400DA6239 /* mothurchimera.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothurchimera.cpp; path = source/chimera/mothurchimera.cpp; sourceTree = SOURCE_ROOT; }; A7E9B67912D37EC400DA6239 /* mothurchimera.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothurchimera.h; path = source/chimera/mothurchimera.h; sourceTree = SOURCE_ROOT; }; A7E9B67A12D37EC400DA6239 /* chimerabellerophoncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimerabellerophoncommand.cpp; path = source/commands/chimerabellerophoncommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B67B12D37EC400DA6239 /* chimerabellerophoncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimerabellerophoncommand.h; path = source/commands/chimerabellerophoncommand.h; sourceTree = SOURCE_ROOT; }; A7E9B67C12D37EC400DA6239 /* chimeraccodecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = chimeraccodecommand.cpp; path = source/commands/chimeraccodecommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B67D12D37EC400DA6239 /* chimeraccodecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeraccodecommand.h; path = source/commands/chimeraccodecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B67E12D37EC400DA6239 /* chimeracheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = chimeracheckcommand.cpp; path = source/commands/chimeracheckcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B67F12D37EC400DA6239 /* chimeracheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeracheckcommand.h; path = source/commands/chimeracheckcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B68012D37EC400DA6239 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeracheckrdp.cpp; path = source/chimera/chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; }; A7E9B68112D37EC400DA6239 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = chimeracheckrdp.h; path = source/chimera/chimeracheckrdp.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = chimerapintailcommand.cpp; path = source/commands/chimerapintailcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B68312D37EC400DA6239 /* chimerapintailcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimerapintailcommand.h; path = source/commands/chimerapintailcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B68412D37EC400DA6239 /* chimerarealigner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimerarealigner.cpp; path = source/chimera/chimerarealigner.cpp; sourceTree = SOURCE_ROOT; }; A7E9B68512D37EC400DA6239 /* chimerarealigner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimerarealigner.h; path = source/chimera/chimerarealigner.h; sourceTree = SOURCE_ROOT; }; A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeraslayer.cpp; path = source/chimera/chimeraslayer.cpp; sourceTree = SOURCE_ROOT; }; A7E9B68912D37EC400DA6239 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeraslayer.h; path = source/chimera/chimeraslayer.h; sourceTree = SOURCE_ROOT; }; A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeraslayercommand.cpp; path = source/commands/chimeraslayercommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B68B12D37EC400DA6239 /* chimeraslayercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeraslayercommand.h; path = source/commands/chimeraslayercommand.h; sourceTree = SOURCE_ROOT; }; A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = chopseqscommand.cpp; path = source/commands/chopseqscommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B68D12D37EC400DA6239 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chopseqscommand.h; path = source/commands/chopseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B68E12D37EC400DA6239 /* classify.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classify.cpp; path = source/classifier/classify.cpp; sourceTree = SOURCE_ROOT; }; A7E9B68F12D37EC400DA6239 /* classify.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classify.h; path = source/classifier/classify.h; sourceTree = SOURCE_ROOT; }; A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classifyotucommand.cpp; path = source/commands/classifyotucommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69112D37EC400DA6239 /* classifyotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classifyotucommand.h; path = source/commands/classifyotucommand.h; sourceTree = SOURCE_ROOT; }; A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classifyseqscommand.cpp; path = source/commands/classifyseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classifyseqscommand.h; path = source/commands/classifyseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B69412D37EC400DA6239 /* clearcut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clearcut.cpp; path = source/clearcut/clearcut.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69512D37EC400DA6239 /* clearcut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clearcut.h; path = source/clearcut/clearcut.h; sourceTree = SOURCE_ROOT; }; A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clearcutcommand.cpp; path = source/commands/clearcutcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69712D37EC400DA6239 /* clearcutcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clearcutcommand.h; path = source/commands/clearcutcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B69812D37EC400DA6239 /* cluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cluster.cpp; path = source/cluster.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69912D37EC400DA6239 /* cluster.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = cluster.hpp; path = source/cluster.hpp; sourceTree = SOURCE_ROOT; }; A7E9B69A12D37EC400DA6239 /* clusterclassic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clusterclassic.cpp; path = source/clusterclassic.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69B12D37EC400DA6239 /* clusterclassic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clusterclassic.h; path = source/clusterclassic.h; sourceTree = SOURCE_ROOT; }; A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clustercommand.cpp; path = source/commands/clustercommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69D12D37EC400DA6239 /* clustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = clustercommand.h; path = source/commands/clustercommand.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; A7E9B69E12D37EC400DA6239 /* clusterdoturcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clusterdoturcommand.cpp; path = source/commands/clusterdoturcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B69F12D37EC400DA6239 /* clusterdoturcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clusterdoturcommand.h; path = source/commands/clusterdoturcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6A012D37EC400DA6239 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clusterfragmentscommand.cpp; path = source/commands/clusterfragmentscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6A112D37EC400DA6239 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clusterfragmentscommand.h; path = source/commands/clusterfragmentscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6A212D37EC400DA6239 /* clustersplitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = clustersplitcommand.cpp; path = source/commands/clustersplitcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B6A312D37EC400DA6239 /* clustersplitcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clustersplitcommand.h; path = source/commands/clustersplitcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cmdargs.cpp; path = source/clearcut/cmdargs.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6A512D37EC400DA6239 /* cmdargs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = cmdargs.h; path = source/clearcut/cmdargs.h; sourceTree = SOURCE_ROOT; }; A7E9B6A612D37EC400DA6239 /* collect.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = collect.cpp; path = source/collect.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6A712D37EC400DA6239 /* collect.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = collect.h; path = source/collect.h; sourceTree = SOURCE_ROOT; }; A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = collectcommand.cpp; path = source/commands/collectcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6A912D37EC400DA6239 /* collectcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = collectcommand.h; path = source/commands/collectcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6AA12D37EC400DA6239 /* collectdisplay.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = collectdisplay.h; path = source/collectdisplay.h; sourceTree = SOURCE_ROOT; }; A7E9B6AB12D37EC400DA6239 /* collectorscurvedata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = collectorscurvedata.h; path = source/collectorscurvedata.h; sourceTree = SOURCE_ROOT; }; A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = collectsharedcommand.cpp; path = source/commands/collectsharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6AD12D37EC400DA6239 /* collectsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = collectsharedcommand.h; path = source/commands/collectsharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6AE12D37EC400DA6239 /* command.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = command.hpp; path = source/commands/command.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = commandfactory.cpp; path = source/commandfactory.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6B012D37EC400DA6239 /* commandfactory.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = commandfactory.hpp; path = source/commandfactory.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = commandoptionparser.cpp; path = source/commandoptionparser.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = commandoptionparser.hpp; path = source/commandoptionparser.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6B312D37EC400DA6239 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = common.h; path = source/clearcut/common.h; sourceTree = SOURCE_ROOT; }; A7E9B6B512D37EC400DA6239 /* consensus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = consensus.cpp; path = source/consensus.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6B612D37EC400DA6239 /* consensus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = consensus.h; path = source/consensus.h; sourceTree = SOURCE_ROOT; }; A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = consensusseqscommand.cpp; path = source/commands/consensusseqscommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = consensusseqscommand.h; path = source/commands/consensusseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = corraxescommand.cpp; path = source/commands/corraxescommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = corraxescommand.h; path = source/commands/corraxescommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6BB12D37EC400DA6239 /* coverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = coverage.cpp; path = source/calculators/coverage.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6BC12D37EC400DA6239 /* coverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = coverage.h; path = source/calculators/coverage.h; sourceTree = SOURCE_ROOT; }; A7E9B6BE12D37EC400DA6239 /* searchdatabase.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = searchdatabase.hpp; path = source/datastructures/searchdatabase.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6BF12D37EC400DA6239 /* datavector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = datavector.hpp; path = source/datastructures/datavector.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6C012D37EC400DA6239 /* dayhoff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dayhoff.h; path = source/calculators/dayhoff.h; sourceTree = SOURCE_ROOT; }; A7E9B6C112D37EC400DA6239 /* decalc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = decalc.cpp; path = source/chimera/decalc.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6C212D37EC400DA6239 /* decalc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = decalc.h; path = source/chimera/decalc.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; }; A7E9B6C312D37EC400DA6239 /* uniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = uniqueseqscommand.cpp; path = source/commands/uniqueseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6C412D37EC400DA6239 /* uniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = uniqueseqscommand.h; path = source/commands/uniqueseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6C512D37EC400DA6239 /* degapseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = degapseqscommand.cpp; path = source/commands/degapseqscommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = degapseqscommand.h; path = source/commands/degapseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6C712D37EC400DA6239 /* deuniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = deuniqueseqscommand.cpp; path = source/commands/deuniqueseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6C812D37EC400DA6239 /* deuniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = deuniqueseqscommand.h; path = source/commands/deuniqueseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6C912D37EC400DA6239 /* display.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = display.h; path = source/display.h; sourceTree = SOURCE_ROOT; }; A7E9B6CB12D37EC400DA6239 /* distancecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = distancecommand.cpp; path = source/commands/distancecommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B6CC12D37EC400DA6239 /* distancecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = distancecommand.h; path = source/commands/distancecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = distancedb.cpp; path = source/datastructures/distancedb.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = distancedb.hpp; path = source/datastructures/distancedb.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6CF12D37EC400DA6239 /* distclearcut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = distclearcut.cpp; path = source/clearcut/distclearcut.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6D012D37EC400DA6239 /* distclearcut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = distclearcut.h; path = source/clearcut/distclearcut.h; sourceTree = SOURCE_ROOT; }; A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dlibshuff.cpp; path = source/dlibshuff.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6D212D37EC400DA6239 /* dlibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dlibshuff.h; path = source/dlibshuff.h; sourceTree = SOURCE_ROOT; }; A7E9B6D312D37EC400DA6239 /* dmat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = dmat.cpp; path = source/clearcut/dmat.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6D412D37EC400DA6239 /* dmat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dmat.h; path = source/clearcut/dmat.h; sourceTree = SOURCE_ROOT; }; A7E9B6D512D37EC400DA6239 /* eachgapdist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = eachgapdist.h; path = source/calculators/eachgapdist.h; sourceTree = SOURCE_ROOT; }; A7E9B6D612D37EC400DA6239 /* eachgapignore.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = eachgapignore.h; path = source/calculators/eachgapignore.h; sourceTree = SOURCE_ROOT; }; A7E9B6D712D37EC400DA6239 /* efron.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = efron.cpp; path = source/calculators/efron.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6D812D37EC400DA6239 /* efron.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = efron.h; path = source/calculators/efron.h; sourceTree = SOURCE_ROOT; }; A7E9B6D912D37EC400DA6239 /* endiannessmacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = endiannessmacros.h; path = source/endiannessmacros.h; sourceTree = SOURCE_ROOT; }; A7E9B6DB12D37EC400DA6239 /* engine.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = engine.hpp; path = source/engines/engine.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6DC12D37EC400DA6239 /* fasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fasta.cpp; path = source/clearcut/fasta.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6DD12D37EC400DA6239 /* fasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fasta.h; path = source/clearcut/fasta.h; sourceTree = SOURCE_ROOT; }; A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fastamap.cpp; path = source/datastructures/fastamap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6DF12D37EC400DA6239 /* fastamap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fastamap.h; path = source/datastructures/fastamap.h; sourceTree = SOURCE_ROOT; }; A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fileoutput.cpp; path = source/fileoutput.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6E112D37EC400DA6239 /* fileoutput.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fileoutput.h; path = source/fileoutput.h; sourceTree = SOURCE_ROOT; }; A7E9B6E212D37EC400DA6239 /* filters.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = filters.h; path = source/calculators/filters.h; sourceTree = SOURCE_ROOT; }; A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = filterseqscommand.cpp; path = source/commands/filterseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6E412D37EC400DA6239 /* filterseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = filterseqscommand.h; path = source/commands/filterseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6E712D37EC400DA6239 /* flowdata.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = flowdata.cpp; path = source/datastructures/flowdata.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6E812D37EC400DA6239 /* flowdata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = flowdata.h; path = source/datastructures/flowdata.h; sourceTree = SOURCE_ROOT; }; A7E9B6EE12D37EC400DA6239 /* fullmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fullmatrix.cpp; path = source/datastructures/fullmatrix.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6EF12D37EC400DA6239 /* fullmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fullmatrix.h; path = source/datastructures/fullmatrix.h; sourceTree = SOURCE_ROOT; }; A7E9B6F012D37EC400DA6239 /* geom.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = geom.cpp; path = source/calculators/geom.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6F112D37EC400DA6239 /* geom.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = geom.h; path = source/calculators/geom.h; sourceTree = SOURCE_ROOT; }; A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = getgroupcommand.cpp; path = source/commands/getgroupcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B6F312D37EC400DA6239 /* getgroupcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getgroupcommand.h; path = source/commands/getgroupcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getgroupscommand.cpp; path = source/commands/getgroupscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6F512D37EC400DA6239 /* getgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getgroupscommand.h; path = source/commands/getgroupscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getlabelcommand.cpp; path = source/commands/getlabelcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getlabelcommand.h; path = source/commands/getlabelcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getlineagecommand.cpp; path = source/commands/getlineagecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getlineagecommand.h; path = source/commands/getlineagecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6FA12D37EC400DA6239 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getlistcountcommand.cpp; path = source/commands/getlistcountcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getlistcountcommand.h; path = source/commands/getlistcountcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B6FC12D37EC400DA6239 /* getopt_long.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getopt_long.cpp; path = source/clearcut/getopt_long.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6FD12D37EC400DA6239 /* getopt_long.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getopt_long.h; path = source/clearcut/getopt_long.h; sourceTree = SOURCE_ROOT; }; A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getoturepcommand.cpp; path = source/commands/getoturepcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B6FF12D37EC400DA6239 /* getoturepcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getoturepcommand.h; path = source/commands/getoturepcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = getrabundcommand.cpp; path = source/commands/getrabundcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B70312D37EC400DA6239 /* getrabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getrabundcommand.h; path = source/commands/getrabundcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70412D37EC400DA6239 /* getrelabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getrelabundcommand.cpp; path = source/commands/getrelabundcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B70512D37EC400DA6239 /* getrelabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getrelabundcommand.h; path = source/commands/getrelabundcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70612D37EC400DA6239 /* getsabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = getsabundcommand.cpp; path = source/commands/getsabundcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B70712D37EC400DA6239 /* getsabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getsabundcommand.h; path = source/commands/getsabundcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getseqscommand.cpp; path = source/commands/getseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B70912D37EC400DA6239 /* getseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getseqscommand.h; path = source/commands/getseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getsharedotucommand.cpp; path = source/commands/getsharedotucommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B70B12D37EC400DA6239 /* getsharedotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getsharedotucommand.h; path = source/commands/getsharedotucommand.h; sourceTree = SOURCE_ROOT; }; A7E9B70E12D37EC400DA6239 /* goodscoverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = goodscoverage.cpp; path = source/calculators/goodscoverage.cpp; sourceTree = SOURCE_ROOT; }; A7E9B70F12D37EC400DA6239 /* goodscoverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = goodscoverage.h; path = source/calculators/goodscoverage.h; sourceTree = SOURCE_ROOT; }; A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = gotohoverlap.cpp; path = source/gotohoverlap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71112D37EC400DA6239 /* gotohoverlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = gotohoverlap.hpp; path = source/gotohoverlap.hpp; sourceTree = SOURCE_ROOT; }; A7E9B71212D37EC400DA6239 /* gower.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = gower.cpp; path = source/calculators/gower.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71312D37EC400DA6239 /* gower.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gower.h; path = source/calculators/gower.h; sourceTree = SOURCE_ROOT; }; A7E9B71412D37EC400DA6239 /* groupmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = groupmap.cpp; path = source/datastructures/groupmap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71512D37EC400DA6239 /* groupmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = groupmap.h; path = source/datastructures/groupmap.h; sourceTree = SOURCE_ROOT; }; A7E9B71612D37EC400DA6239 /* hamming.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hamming.cpp; path = source/calculators/hamming.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71712D37EC400DA6239 /* hamming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hamming.h; path = source/calculators/hamming.h; sourceTree = SOURCE_ROOT; }; A7E9B71C12D37EC400DA6239 /* heatmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmap.cpp; path = source/heatmap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71D12D37EC400DA6239 /* heatmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heatmap.h; path = source/heatmap.h; sourceTree = SOURCE_ROOT; }; A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmapcommand.cpp; path = source/commands/heatmapcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B71F12D37EC400DA6239 /* heatmapcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heatmapcommand.h; path = source/commands/heatmapcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmapsim.cpp; path = source/heatmapsim.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72112D37EC400DA6239 /* heatmapsim.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heatmapsim.h; path = source/heatmapsim.h; sourceTree = SOURCE_ROOT; }; A7E9B72212D37EC400DA6239 /* heatmapsimcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmapsimcommand.cpp; path = source/commands/heatmapsimcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72312D37EC400DA6239 /* heatmapsimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heatmapsimcommand.h; path = source/commands/heatmapsimcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B72412D37EC400DA6239 /* heip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heip.cpp; path = source/calculators/heip.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72512D37EC400DA6239 /* heip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heip.h; path = source/calculators/heip.h; sourceTree = SOURCE_ROOT; }; A7E9B72612D37EC400DA6239 /* hellinger.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hellinger.cpp; path = source/calculators/hellinger.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72712D37EC400DA6239 /* hellinger.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hellinger.h; path = source/calculators/hellinger.h; sourceTree = SOURCE_ROOT; }; A7E9B72812D37EC400DA6239 /* helpcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = helpcommand.cpp; path = source/commands/helpcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72912D37EC400DA6239 /* helpcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = helpcommand.h; path = source/commands/helpcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B72A12D37EC400DA6239 /* ignoregaps.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ignoregaps.h; path = source/calculators/ignoregaps.h; sourceTree = SOURCE_ROOT; }; A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = indicatorcommand.cpp; path = source/commands/indicatorcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72C12D37EC400DA6239 /* indicatorcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = indicatorcommand.h; path = source/commands/indicatorcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B72D12D37EC400DA6239 /* inputdata.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = inputdata.cpp; path = source/inputdata.cpp; sourceTree = SOURCE_ROOT; }; A7E9B72E12D37EC400DA6239 /* inputdata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = inputdata.h; path = source/inputdata.h; sourceTree = SOURCE_ROOT; }; A7E9B72F12D37EC400DA6239 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = invsimpson.cpp; path = source/calculators/invsimpson.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73012D37EC400DA6239 /* invsimpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = invsimpson.h; path = source/calculators/invsimpson.h; sourceTree = SOURCE_ROOT; }; A7E9B73112D37EC400DA6239 /* jackknife.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = jackknife.cpp; path = source/calculators/jackknife.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73212D37EC400DA6239 /* jackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = jackknife.h; path = source/calculators/jackknife.h; sourceTree = SOURCE_ROOT; }; A7E9B73312D37EC400DA6239 /* kmer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmer.cpp; path = source/datastructures/kmer.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73412D37EC400DA6239 /* kmer.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = kmer.hpp; path = source/datastructures/kmer.hpp; sourceTree = SOURCE_ROOT; }; A7E9B73512D37EC400DA6239 /* kmerdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kmerdb.cpp; path = source/datastructures/kmerdb.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73612D37EC400DA6239 /* kmerdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = kmerdb.hpp; path = source/datastructures/kmerdb.hpp; sourceTree = SOURCE_ROOT; }; A7E9B73712D37EC400DA6239 /* knn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = knn.cpp; path = source/classifier/knn.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73812D37EC400DA6239 /* knn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = knn.h; path = source/classifier/knn.h; sourceTree = SOURCE_ROOT; }; A7E9B73912D37EC400DA6239 /* libshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = libshuff.cpp; path = source/libshuff.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73A12D37EC400DA6239 /* libshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = libshuff.h; path = source/libshuff.h; sourceTree = SOURCE_ROOT; }; A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = libshuffcommand.cpp; path = source/commands/libshuffcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = libshuffcommand.h; path = source/commands/libshuffcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = listseqscommand.cpp; path = source/commands/listseqscommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B73E12D37EC400DA6239 /* listseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = listseqscommand.h; path = source/commands/listseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B73F12D37EC400DA6239 /* listvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = listvector.cpp; path = source/datastructures/listvector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74012D37EC400DA6239 /* listvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = listvector.hpp; path = source/datastructures/listvector.hpp; sourceTree = SOURCE_ROOT; }; A7E9B74112D37EC400DA6239 /* logsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = logsd.cpp; path = source/calculators/logsd.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74212D37EC400DA6239 /* logsd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = logsd.h; path = source/calculators/logsd.h; sourceTree = SOURCE_ROOT; }; A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; name = makegroupcommand.cpp; path = source/commands/makegroupcommand.cpp; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.cpp; }; A7E9B74412D37EC400DA6239 /* makegroupcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makegroupcommand.h; path = source/commands/makegroupcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B74512D37EC400DA6239 /* maligner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = maligner.cpp; path = source/chimera/maligner.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74612D37EC400DA6239 /* maligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = maligner.h; path = source/chimera/maligner.h; sourceTree = SOURCE_ROOT; }; A7E9B74712D37EC400DA6239 /* manhattan.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = manhattan.cpp; path = source/calculators/manhattan.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74812D37EC400DA6239 /* manhattan.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = manhattan.h; path = source/calculators/manhattan.h; sourceTree = SOURCE_ROOT; }; A7E9B74912D37EC400DA6239 /* distsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = distsharedcommand.cpp; path = source/commands/distsharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74A12D37EC400DA6239 /* distsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = distsharedcommand.h; path = source/commands/distsharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B74B12D37EC400DA6239 /* memchi2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = memchi2.cpp; path = source/calculators/memchi2.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74C12D37EC400DA6239 /* memchi2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = memchi2.h; path = source/calculators/memchi2.h; sourceTree = SOURCE_ROOT; }; A7E9B74D12D37EC400DA6239 /* memchord.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = memchord.cpp; path = source/calculators/memchord.cpp; sourceTree = SOURCE_ROOT; }; A7E9B74E12D37EC400DA6239 /* memchord.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = memchord.h; path = source/calculators/memchord.h; sourceTree = SOURCE_ROOT; }; A7E9B74F12D37EC400DA6239 /* memeuclidean.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = memeuclidean.cpp; path = source/calculators/memeuclidean.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75012D37EC400DA6239 /* memeuclidean.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = memeuclidean.h; path = source/calculators/memeuclidean.h; sourceTree = SOURCE_ROOT; }; A7E9B75112D37EC400DA6239 /* mempearson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mempearson.cpp; path = source/calculators/mempearson.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75212D37EC400DA6239 /* mempearson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mempearson.h; path = source/calculators/mempearson.h; sourceTree = SOURCE_ROOT; }; A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergefilecommand.cpp; path = source/commands/mergefilecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75412D37EC400DA6239 /* mergefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergefilecommand.h; path = source/commands/mergefilecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = metastatscommand.cpp; path = source/commands/metastatscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75812D37EC400DA6239 /* metastatscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = metastatscommand.h; path = source/commands/metastatscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mgclustercommand.cpp; path = source/commands/mgclustercommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75A12D37EC400DA6239 /* mgclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mgclustercommand.h; path = source/commands/mgclustercommand.h; sourceTree = SOURCE_ROOT; }; A7E9B75B12D37EC400DA6239 /* mothur.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothur.cpp; path = source/mothur.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75C12D37EC400DA6239 /* mothur.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothur.h; path = source/mothur.h; sourceTree = SOURCE_ROOT; }; A7E9B75D12D37EC400DA6239 /* mothurout.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothurout.cpp; path = source/mothurout.cpp; sourceTree = SOURCE_ROOT; }; A7E9B75E12D37EC400DA6239 /* mothurout.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothurout.h; path = source/mothurout.h; sourceTree = SOURCE_ROOT; }; A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = nameassignment.cpp; path = source/datastructures/nameassignment.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76012D37EC400DA6239 /* nameassignment.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = nameassignment.hpp; path = source/datastructures/nameassignment.hpp; sourceTree = SOURCE_ROOT; }; A7E9B76112D37EC400DA6239 /* nast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = nast.cpp; path = source/nast.cpp; sourceTree = ""; }; A7E9B76212D37EC400DA6239 /* nast.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = nast.hpp; path = source/nast.hpp; sourceTree = ""; }; A7E9B76312D37EC400DA6239 /* alignreport.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignreport.cpp; path = source/alignreport.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76412D37EC400DA6239 /* alignreport.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = alignreport.hpp; path = source/alignreport.hpp; sourceTree = SOURCE_ROOT; }; A7E9B76512D37EC400DA6239 /* needlemanoverlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = needlemanoverlap.cpp; path = source/needlemanoverlap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76612D37EC400DA6239 /* needlemanoverlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = needlemanoverlap.hpp; path = source/needlemanoverlap.hpp; sourceTree = SOURCE_ROOT; }; A7E9B76712D37EC400DA6239 /* noalign.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = noalign.cpp; path = source/noalign.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76812D37EC400DA6239 /* noalign.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = noalign.hpp; path = source/noalign.hpp; sourceTree = SOURCE_ROOT; }; A7E9B76912D37EC400DA6239 /* nocommands.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = nocommands.cpp; path = source/commands/nocommands.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76A12D37EC400DA6239 /* nocommands.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = nocommands.h; path = source/commands/nocommands.h; sourceTree = SOURCE_ROOT; }; A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = normalizesharedcommand.cpp; path = source/commands/normalizesharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76C12D37EC400DA6239 /* normalizesharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = normalizesharedcommand.h; path = source/commands/normalizesharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B76D12D37EC400DA6239 /* npshannon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = npshannon.cpp; path = source/calculators/npshannon.cpp; sourceTree = SOURCE_ROOT; }; A7E9B76E12D37EC400DA6239 /* npshannon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = npshannon.h; path = source/calculators/npshannon.h; sourceTree = SOURCE_ROOT; }; A7E9B76F12D37EC400DA6239 /* nseqs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = nseqs.h; path = source/calculators/nseqs.h; sourceTree = SOURCE_ROOT; }; A7E9B77012D37EC400DA6239 /* observable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = observable.h; path = source/observable.h; sourceTree = SOURCE_ROOT; }; A7E9B77112D37EC400DA6239 /* odum.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = odum.cpp; path = source/calculators/odum.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77212D37EC400DA6239 /* odum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = odum.h; path = source/calculators/odum.h; sourceTree = SOURCE_ROOT; }; A7E9B77312D37EC400DA6239 /* onegapdist.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = onegapdist.h; path = source/calculators/onegapdist.h; sourceTree = SOURCE_ROOT; }; A7E9B77412D37EC400DA6239 /* onegapignore.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = onegapignore.h; path = source/calculators/onegapignore.h; sourceTree = SOURCE_ROOT; }; A7E9B77512D37EC400DA6239 /* optionparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = optionparser.cpp; path = source/optionparser.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77612D37EC400DA6239 /* optionparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = optionparser.h; path = source/optionparser.h; sourceTree = SOURCE_ROOT; }; A7E9B77712D37EC400DA6239 /* ordervector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ordervector.cpp; path = source/datastructures/ordervector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77812D37EC400DA6239 /* ordervector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ordervector.hpp; path = source/datastructures/ordervector.hpp; sourceTree = SOURCE_ROOT; }; A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = otuhierarchycommand.cpp; path = source/commands/otuhierarchycommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77A12D37EC400DA6239 /* otuhierarchycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = otuhierarchycommand.h; path = source/commands/otuhierarchycommand.h; sourceTree = SOURCE_ROOT; }; A7E9B77B12D37EC400DA6239 /* overlap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = overlap.cpp; path = source/overlap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77C12D37EC400DA6239 /* overlap.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = overlap.hpp; path = source/overlap.hpp; sourceTree = SOURCE_ROOT; }; A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pairwiseseqscommand.cpp; path = source/commands/pairwiseseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B77E12D37EC400DA6239 /* pairwiseseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pairwiseseqscommand.h; path = source/commands/pairwiseseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B77F12D37EC400DA6239 /* fastaqinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = fastaqinfocommand.cpp; path = source/commands/fastaqinfocommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78012D37EC400DA6239 /* fastaqinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = fastaqinfocommand.h; path = source/commands/fastaqinfocommand.h; sourceTree = SOURCE_ROOT; }; A7E9B78312D37EC400DA6239 /* parsimony.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parsimony.cpp; path = source/calculators/parsimony.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78412D37EC400DA6239 /* parsimony.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = parsimony.h; path = source/calculators/parsimony.h; sourceTree = SOURCE_ROOT; }; A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parsimonycommand.cpp; path = source/commands/parsimonycommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78612D37EC400DA6239 /* parsimonycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = parsimonycommand.h; path = source/commands/parsimonycommand.h; sourceTree = SOURCE_ROOT; }; A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pcoacommand.cpp; path = source/commands/pcoacommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78812D37EC400DA6239 /* pcoacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pcoacommand.h; path = source/commands/pcoacommand.h; sourceTree = SOURCE_ROOT; }; A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = phylodiversitycommand.cpp; path = source/commands/phylodiversitycommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78C12D37EC400DA6239 /* phylodiversitycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = phylodiversitycommand.h; path = source/commands/phylodiversitycommand.h; sourceTree = SOURCE_ROOT; }; A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = phylosummary.cpp; path = source/classifier/phylosummary.cpp; sourceTree = SOURCE_ROOT; }; A7E9B78E12D37EC400DA6239 /* phylosummary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = phylosummary.h; path = source/classifier/phylosummary.h; sourceTree = SOURCE_ROOT; }; A7E9B78F12D37EC400DA6239 /* phylotree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = phylotree.cpp; path = source/classifier/phylotree.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79012D37EC400DA6239 /* phylotree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = phylotree.h; path = source/classifier/phylotree.h; sourceTree = SOURCE_ROOT; }; A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = phylotypecommand.cpp; path = source/commands/phylotypecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79212D37EC400DA6239 /* phylotypecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = phylotypecommand.h; path = source/commands/phylotypecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B79312D37EC400DA6239 /* pintail.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pintail.cpp; path = source/chimera/pintail.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79412D37EC400DA6239 /* pintail.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pintail.h; path = source/chimera/pintail.h; sourceTree = SOURCE_ROOT; }; A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = preclustercommand.cpp; path = source/commands/preclustercommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79812D37EC400DA6239 /* preclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = preclustercommand.h; path = source/commands/preclustercommand.h; sourceTree = SOURCE_ROOT; }; A7E9B79912D37EC400DA6239 /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = prng.cpp; path = source/calculators/prng.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79A12D37EC400DA6239 /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = prng.h; path = source/calculators/prng.h; sourceTree = SOURCE_ROOT; }; A7E9B79D12D37EC400DA6239 /* qstat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = qstat.cpp; path = source/calculators/qstat.cpp; sourceTree = SOURCE_ROOT; }; A7E9B79E12D37EC400DA6239 /* qstat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = qstat.h; path = source/calculators/qstat.h; sourceTree = SOURCE_ROOT; }; A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = qualityscores.cpp; path = source/datastructures/qualityscores.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7A012D37EC400DA6239 /* qualityscores.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = qualityscores.h; path = source/datastructures/qualityscores.h; sourceTree = SOURCE_ROOT; }; A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = quitcommand.cpp; path = source/commands/quitcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7A212D37EC400DA6239 /* quitcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = quitcommand.h; path = source/commands/quitcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rabundvector.cpp; path = source/datastructures/rabundvector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7A412D37EC400DA6239 /* rabundvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = rabundvector.hpp; path = source/datastructures/rabundvector.hpp; sourceTree = SOURCE_ROOT; }; A7E9B7A712D37EC400DA6239 /* raredisplay.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = raredisplay.cpp; path = source/raredisplay.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7A812D37EC400DA6239 /* raredisplay.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = raredisplay.h; path = source/raredisplay.h; sourceTree = SOURCE_ROOT; }; A7E9B7A912D37EC400DA6239 /* rarefact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rarefact.cpp; path = source/rarefact.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7AA12D37EC400DA6239 /* rarefact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rarefact.h; path = source/rarefact.h; sourceTree = SOURCE_ROOT; }; A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rarefactcommand.cpp; path = source/commands/rarefactcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7AC12D37EC400DA6239 /* rarefactcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rarefactcommand.h; path = source/commands/rarefactcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7AD12D37EC400DA6239 /* rarefactioncurvedata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rarefactioncurvedata.h; path = source/rarefactioncurvedata.h; sourceTree = SOURCE_ROOT; }; A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rarefactsharedcommand.cpp; path = source/commands/rarefactsharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7AF12D37EC400DA6239 /* rarefactsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = rarefactsharedcommand.h; path = source/commands/rarefactsharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7B012D37EC400DA6239 /* readblast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readblast.cpp; path = source/read/readblast.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7B112D37EC400DA6239 /* readblast.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readblast.h; path = source/read/readblast.h; sourceTree = SOURCE_ROOT; }; A7E9B7B212D37EC400DA6239 /* readcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readcluster.cpp; path = source/read/readcluster.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7B312D37EC400DA6239 /* readcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readcluster.h; path = source/read/readcluster.h; sourceTree = SOURCE_ROOT; }; A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readcolumn.cpp; path = source/read/readcolumn.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7B512D37EC400DA6239 /* readcolumn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readcolumn.h; path = source/read/readcolumn.h; sourceTree = SOURCE_ROOT; }; A7E9B7B812D37EC400DA6239 /* readmatrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = readmatrix.hpp; path = source/read/readmatrix.hpp; sourceTree = SOURCE_ROOT; }; A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readphylip.cpp; path = source/read/readphylip.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7BE12D37EC400DA6239 /* readphylip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readphylip.h; path = source/read/readphylip.h; sourceTree = SOURCE_ROOT; }; A7E9B7BF12D37EC400DA6239 /* readtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = readtree.cpp; path = source/read/readtree.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7C012D37EC400DA6239 /* readtree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = readtree.h; path = source/read/readtree.h; sourceTree = SOURCE_ROOT; }; A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removegroupscommand.cpp; path = source/commands/removegroupscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removegroupscommand.h; path = source/commands/removegroupscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removelineagecommand.cpp; path = source/commands/removelineagecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removelineagecommand.h; path = source/commands/removelineagecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = removeseqscommand.cpp; path = source/commands/removeseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = removeseqscommand.h; path = source/commands/removeseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = reversecommand.cpp; path = source/commands/reversecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7CE12D37EC400DA6239 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = reversecommand.h; path = source/commands/reversecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sabundvector.cpp; path = source/datastructures/sabundvector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7D012D37EC400DA6239 /* sabundvector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sabundvector.hpp; path = source/datastructures/sabundvector.hpp; sourceTree = SOURCE_ROOT; }; A7E9B7D112D37EC400DA6239 /* screenseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = screenseqscommand.cpp; path = source/commands/screenseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7D212D37EC400DA6239 /* screenseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = screenseqscommand.h; path = source/commands/screenseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7D312D37EC400DA6239 /* aligncheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = aligncheckcommand.cpp; path = source/commands/aligncheckcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7D412D37EC400DA6239 /* aligncheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = aligncheckcommand.h; path = source/commands/aligncheckcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7D512D37EC400DA6239 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sensspeccommand.cpp; path = source/commands/sensspeccommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7D612D37EC400DA6239 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sensspeccommand.h; path = source/commands/sensspeccommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7D712D37EC400DA6239 /* seqerrorcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = seqerrorcommand.cpp; path = source/commands/seqerrorcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7D812D37EC400DA6239 /* seqerrorcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = seqerrorcommand.h; path = source/commands/seqerrorcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7D912D37EC400DA6239 /* seqsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = seqsummarycommand.cpp; path = source/commands/seqsummarycommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7DA12D37EC400DA6239 /* seqsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = seqsummarycommand.h; path = source/commands/seqsummarycommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7DB12D37EC400DA6239 /* sequence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sequence.cpp; path = source/datastructures/sequence.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7DC12D37EC400DA6239 /* sequence.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sequence.hpp; path = source/datastructures/sequence.hpp; sourceTree = SOURCE_ROOT; }; A7E9B7DD12D37EC400DA6239 /* sequencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sequencedb.cpp; path = source/datastructures/sequencedb.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7DE12D37EC400DA6239 /* sequencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sequencedb.h; path = source/datastructures/sequencedb.h; sourceTree = SOURCE_ROOT; }; A7E9B7DF12D37EC400DA6239 /* setdircommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = setdircommand.cpp; path = source/commands/setdircommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7E012D37EC400DA6239 /* setdircommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = setdircommand.h; path = source/commands/setdircommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7E112D37EC400DA6239 /* setlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = setlogfilecommand.cpp; path = source/commands/setlogfilecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7E212D37EC400DA6239 /* setlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = setlogfilecommand.h; path = source/commands/setlogfilecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7E312D37EC400DA6239 /* sffinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sffinfocommand.cpp; path = source/commands/sffinfocommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7E412D37EC400DA6239 /* sffinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sffinfocommand.h; path = source/commands/sffinfocommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7E512D37EC400DA6239 /* shannon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shannon.cpp; path = source/calculators/shannon.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7E612D37EC400DA6239 /* shannon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shannon.h; path = source/calculators/shannon.h; sourceTree = SOURCE_ROOT; }; A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shannoneven.cpp; path = source/calculators/shannoneven.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7E812D37EC400DA6239 /* shannoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shannoneven.h; path = source/calculators/shannoneven.h; sourceTree = SOURCE_ROOT; }; A7E9B7E912D37EC400DA6239 /* sharedace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedace.cpp; path = source/calculators/sharedace.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7EA12D37EC400DA6239 /* sharedace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedace.h; path = source/calculators/sharedace.h; sourceTree = SOURCE_ROOT; }; A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedanderbergs.cpp; path = source/calculators/sharedanderbergs.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7ED12D37EC400DA6239 /* sharedanderbergs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedanderbergs.h; path = source/calculators/sharedanderbergs.h; sourceTree = SOURCE_ROOT; }; A7E9B7EE12D37EC400DA6239 /* sharedbraycurtis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedbraycurtis.cpp; path = source/calculators/sharedbraycurtis.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7EF12D37EC400DA6239 /* sharedbraycurtis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedbraycurtis.h; path = source/calculators/sharedbraycurtis.h; sourceTree = SOURCE_ROOT; }; A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedchao1.cpp; path = source/calculators/sharedchao1.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7F112D37EC400DA6239 /* sharedchao1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedchao1.h; path = source/calculators/sharedchao1.h; sourceTree = SOURCE_ROOT; }; A7E9B7F212D37EC400DA6239 /* makesharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makesharedcommand.cpp; path = source/commands/makesharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7F312D37EC400DA6239 /* makesharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makesharedcommand.h; path = source/commands/makesharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedjabund.cpp; path = source/calculators/sharedjabund.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7F512D37EC400DA6239 /* sharedjabund.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedjabund.h; path = source/calculators/sharedjabund.h; sourceTree = SOURCE_ROOT; }; A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedjackknife.cpp; path = source/calculators/sharedjackknife.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7F712D37EC400DA6239 /* sharedjackknife.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedjackknife.h; path = source/calculators/sharedjackknife.h; sourceTree = SOURCE_ROOT; }; A7E9B7F812D37EC400DA6239 /* sharedjclass.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedjclass.cpp; path = source/calculators/sharedjclass.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7F912D37EC400DA6239 /* sharedjclass.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedjclass.h; path = source/calculators/sharedjclass.h; sourceTree = SOURCE_ROOT; }; A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedjest.cpp; path = source/calculators/sharedjest.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7FB12D37EC400DA6239 /* sharedjest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedjest.h; path = source/calculators/sharedjest.h; sourceTree = SOURCE_ROOT; }; A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedkstest.cpp; path = source/calculators/sharedkstest.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedkstest.h; path = source/calculators/sharedkstest.h; sourceTree = SOURCE_ROOT; }; A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedkulczynski.cpp; path = source/calculators/sharedkulczynski.cpp; sourceTree = SOURCE_ROOT; }; A7E9B7FF12D37EC400DA6239 /* sharedkulczynski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedkulczynski.h; path = source/calculators/sharedkulczynski.h; sourceTree = SOURCE_ROOT; }; A7E9B80012D37EC400DA6239 /* sharedkulczynskicody.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedkulczynskicody.cpp; path = source/calculators/sharedkulczynskicody.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80112D37EC400DA6239 /* sharedkulczynskicody.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedkulczynskicody.h; path = source/calculators/sharedkulczynskicody.h; sourceTree = SOURCE_ROOT; }; A7E9B80212D37EC400DA6239 /* sharedlennon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedlennon.cpp; path = source/calculators/sharedlennon.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80312D37EC400DA6239 /* sharedlennon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedlennon.h; path = source/calculators/sharedlennon.h; sourceTree = SOURCE_ROOT; }; A7E9B80412D37EC400DA6239 /* sharedlistvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedlistvector.cpp; path = source/datastructures/sharedlistvector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80512D37EC400DA6239 /* sharedlistvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedlistvector.h; path = source/datastructures/sharedlistvector.h; sourceTree = SOURCE_ROOT; }; A7E9B80612D37EC400DA6239 /* sharedmarczewski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedmarczewski.cpp; path = source/calculators/sharedmarczewski.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80712D37EC400DA6239 /* sharedmarczewski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedmarczewski.h; path = source/calculators/sharedmarczewski.h; sourceTree = SOURCE_ROOT; }; A7E9B80812D37EC400DA6239 /* sharedmorisitahorn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedmorisitahorn.cpp; path = source/calculators/sharedmorisitahorn.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80912D37EC400DA6239 /* sharedmorisitahorn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedmorisitahorn.h; path = source/calculators/sharedmorisitahorn.h; sourceTree = SOURCE_ROOT; }; A7E9B80A12D37EC400DA6239 /* sharednseqs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharednseqs.h; path = source/calculators/sharednseqs.h; sourceTree = SOURCE_ROOT; }; A7E9B80B12D37EC400DA6239 /* sharedochiai.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedochiai.cpp; path = source/calculators/sharedochiai.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80C12D37EC400DA6239 /* sharedochiai.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedochiai.h; path = source/calculators/sharedochiai.h; sourceTree = SOURCE_ROOT; }; A7E9B80D12D37EC400DA6239 /* sharedordervector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedordervector.cpp; path = source/datastructures/sharedordervector.cpp; sourceTree = SOURCE_ROOT; }; A7E9B80E12D37EC400DA6239 /* sharedordervector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedordervector.h; path = source/datastructures/sharedordervector.h; sourceTree = SOURCE_ROOT; }; A7E9B81512D37EC400DA6239 /* sharedsobs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedsobs.cpp; path = source/calculators/sharedsobs.cpp; sourceTree = SOURCE_ROOT; }; A7E9B81612D37EC400DA6239 /* sharedsobs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedsobs.h; path = source/calculators/sharedsobs.h; sourceTree = SOURCE_ROOT; }; A7E9B81712D37EC400DA6239 /* sharedsobscollectsummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedsobscollectsummary.cpp; path = source/calculators/sharedsobscollectsummary.cpp; sourceTree = SOURCE_ROOT; }; A7E9B81812D37EC400DA6239 /* sharedsobscollectsummary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedsobscollectsummary.h; path = source/calculators/sharedsobscollectsummary.h; sourceTree = SOURCE_ROOT; }; A7E9B81912D37EC400DA6239 /* sharedsorabund.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedsorabund.cpp; path = source/calculators/sharedsorabund.cpp; sourceTree = SOURCE_ROOT; }; A7E9B81A12D37EC400DA6239 /* sharedsorabund.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedsorabund.h; path = source/calculators/sharedsorabund.h; sourceTree = SOURCE_ROOT; }; A7E9B81B12D37EC400DA6239 /* sharedsorclass.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedsorclass.cpp; path = source/calculators/sharedsorclass.cpp; sourceTree = SOURCE_ROOT; }; A7E9B81C12D37EC400DA6239 /* sharedsorclass.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedsorclass.h; path = source/calculators/sharedsorclass.h; sourceTree = SOURCE_ROOT; }; A7E9B81D12D37EC400DA6239 /* sharedsorest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedsorest.cpp; path = source/calculators/sharedsorest.cpp; sourceTree = SOURCE_ROOT; }; A7E9B81E12D37EC400DA6239 /* sharedsorest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedsorest.h; path = source/calculators/sharedsorest.h; sourceTree = SOURCE_ROOT; }; A7E9B81F12D37EC400DA6239 /* sharedthetan.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedthetan.cpp; path = source/calculators/sharedthetan.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82012D37EC400DA6239 /* sharedthetan.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedthetan.h; path = source/calculators/sharedthetan.h; sourceTree = SOURCE_ROOT; }; A7E9B82112D37EC400DA6239 /* sharedthetayc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedthetayc.cpp; path = source/calculators/sharedthetayc.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82212D37EC400DA6239 /* sharedthetayc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedthetayc.h; path = source/calculators/sharedthetayc.h; sourceTree = SOURCE_ROOT; }; A7E9B82512D37EC400DA6239 /* shen.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shen.cpp; path = source/calculators/shen.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82612D37EC400DA6239 /* shen.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shen.h; path = source/calculators/shen.h; sourceTree = SOURCE_ROOT; }; A7E9B82712D37EC400DA6239 /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = shhhercommand.cpp; path = source/commands/shhhercommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82812D37EC400DA6239 /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shhhercommand.h; path = source/commands/shhhercommand.h; sourceTree = SOURCE_ROOT; }; A7E9B82912D37EC400DA6239 /* simpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = simpson.cpp; path = source/calculators/simpson.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82A12D37EC400DA6239 /* simpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = simpson.h; path = source/calculators/simpson.h; sourceTree = SOURCE_ROOT; }; A7E9B82B12D37EC400DA6239 /* simpsoneven.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = simpsoneven.cpp; path = source/calculators/simpsoneven.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82C12D37EC400DA6239 /* simpsoneven.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = simpsoneven.h; path = source/calculators/simpsoneven.h; sourceTree = SOURCE_ROOT; }; A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = singlelinkage.cpp; path = source/singlelinkage.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82E12D37EC400DA6239 /* slayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = slayer.cpp; path = source/chimera/slayer.cpp; sourceTree = SOURCE_ROOT; }; A7E9B82F12D37EC400DA6239 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = slayer.h; path = source/chimera/slayer.h; sourceTree = SOURCE_ROOT; }; A7E9B83012D37EC400DA6239 /* slibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = slibshuff.cpp; path = source/slibshuff.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83112D37EC400DA6239 /* slibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = slibshuff.h; path = source/slibshuff.h; sourceTree = SOURCE_ROOT; }; A7E9B83212D37EC400DA6239 /* smithwilson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = smithwilson.cpp; path = source/calculators/smithwilson.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83312D37EC400DA6239 /* smithwilson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = smithwilson.h; path = source/calculators/smithwilson.h; sourceTree = SOURCE_ROOT; }; A7E9B83412D37EC400DA6239 /* sobs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sobs.h; path = source/calculators/sobs.h; sourceTree = SOURCE_ROOT; }; A7E9B83512D37EC400DA6239 /* soergel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = soergel.cpp; path = source/calculators/soergel.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83612D37EC400DA6239 /* soergel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = soergel.h; path = source/calculators/soergel.h; sourceTree = SOURCE_ROOT; }; A7E9B83712D37EC400DA6239 /* solow.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = solow.cpp; path = source/calculators/solow.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83812D37EC400DA6239 /* solow.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = solow.h; path = source/calculators/solow.h; sourceTree = SOURCE_ROOT; }; A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sparsematrix.cpp; path = source/datastructures/sparsematrix.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83A12D37EC400DA6239 /* sparsematrix.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sparsematrix.hpp; path = source/datastructures/sparsematrix.hpp; sourceTree = SOURCE_ROOT; }; A7E9B83B12D37EC400DA6239 /* spearman.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spearman.cpp; path = source/calculators/spearman.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83C12D37EC400DA6239 /* spearman.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spearman.h; path = source/calculators/spearman.h; sourceTree = SOURCE_ROOT; }; A7E9B83D12D37EC400DA6239 /* speciesprofile.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = speciesprofile.cpp; path = source/calculators/speciesprofile.cpp; sourceTree = SOURCE_ROOT; }; A7E9B83E12D37EC400DA6239 /* speciesprofile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = speciesprofile.h; path = source/calculators/speciesprofile.h; sourceTree = SOURCE_ROOT; }; A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = splitabundcommand.cpp; path = source/commands/splitabundcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84012D37EC400DA6239 /* splitabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = splitabundcommand.h; path = source/commands/splitabundcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = splitgroupscommand.cpp; path = source/commands/splitgroupscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = splitgroupscommand.h; path = source/commands/splitgroupscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = splitmatrix.cpp; path = source/read/splitmatrix.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84412D37EC400DA6239 /* splitmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = splitmatrix.h; path = source/read/splitmatrix.h; sourceTree = SOURCE_ROOT; }; A7E9B84512D37EC400DA6239 /* structchi2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = structchi2.cpp; path = source/calculators/structchi2.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84612D37EC400DA6239 /* structchi2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = structchi2.h; path = source/calculators/structchi2.h; sourceTree = SOURCE_ROOT; }; A7E9B84712D37EC400DA6239 /* structchord.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = structchord.cpp; path = source/calculators/structchord.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84812D37EC400DA6239 /* structchord.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = structchord.h; path = source/calculators/structchord.h; sourceTree = SOURCE_ROOT; }; A7E9B84912D37EC400DA6239 /* structeuclidean.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = structeuclidean.cpp; path = source/calculators/structeuclidean.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84A12D37EC400DA6239 /* structeuclidean.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = structeuclidean.h; path = source/calculators/structeuclidean.h; sourceTree = SOURCE_ROOT; }; A7E9B84B12D37EC400DA6239 /* structkulczynski.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = structkulczynski.cpp; path = source/calculators/structkulczynski.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84C12D37EC400DA6239 /* structkulczynski.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = structkulczynski.h; path = source/calculators/structkulczynski.h; sourceTree = SOURCE_ROOT; }; A7E9B84D12D37EC400DA6239 /* structpearson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = structpearson.cpp; path = source/calculators/structpearson.cpp; sourceTree = SOURCE_ROOT; }; A7E9B84E12D37EC400DA6239 /* structpearson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = structpearson.h; path = source/calculators/structpearson.h; sourceTree = SOURCE_ROOT; }; A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = subsamplecommand.cpp; path = source/commands/subsamplecommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85012D37EC400DA6239 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = subsamplecommand.h; path = source/commands/subsamplecommand.h; sourceTree = SOURCE_ROOT; }; A7E9B85112D37EC400DA6239 /* suffixdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = suffixdb.cpp; path = source/datastructures/suffixdb.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85212D37EC400DA6239 /* suffixdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = suffixdb.hpp; path = source/datastructures/suffixdb.hpp; sourceTree = SOURCE_ROOT; }; A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = suffixnodes.cpp; path = source/datastructures/suffixnodes.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = suffixnodes.hpp; path = source/datastructures/suffixnodes.hpp; sourceTree = SOURCE_ROOT; }; A7E9B85512D37EC400DA6239 /* suffixtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = suffixtree.cpp; path = source/datastructures/suffixtree.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85612D37EC400DA6239 /* suffixtree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = suffixtree.hpp; path = source/datastructures/suffixtree.hpp; sourceTree = SOURCE_ROOT; }; A7E9B85712D37EC400DA6239 /* summarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = summarycommand.cpp; path = source/commands/summarycommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85812D37EC400DA6239 /* summarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = summarycommand.h; path = source/commands/summarycommand.h; sourceTree = SOURCE_ROOT; }; A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = summarysharedcommand.cpp; path = source/commands/summarysharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85A12D37EC400DA6239 /* summarysharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = summarysharedcommand.h; path = source/commands/summarysharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B85B12D37EC400DA6239 /* systemcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = systemcommand.cpp; path = source/commands/systemcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85C12D37EC400DA6239 /* systemcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = systemcommand.h; path = source/commands/systemcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = taxonomyequalizer.cpp; path = source/classifier/taxonomyequalizer.cpp; sourceTree = SOURCE_ROOT; }; A7E9B85E12D37EC400DA6239 /* taxonomyequalizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = taxonomyequalizer.h; path = source/classifier/taxonomyequalizer.h; sourceTree = SOURCE_ROOT; }; A7E9B85F12D37EC400DA6239 /* tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tree.cpp; path = source/datastructures/tree.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86012D37EC400DA6239 /* tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = tree.h; path = source/datastructures/tree.h; sourceTree = SOURCE_ROOT; }; A7E9B86112D37EC400DA6239 /* treecalculator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = treecalculator.h; path = source/calculators/treecalculator.h; sourceTree = SOURCE_ROOT; }; A7E9B86212D37EC400DA6239 /* treesharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = treesharedcommand.cpp; path = source/commands/treesharedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86312D37EC400DA6239 /* treesharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = treesharedcommand.h; path = source/commands/treesharedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B86412D37EC400DA6239 /* treemap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = treemap.cpp; path = source/datastructures/treemap.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86512D37EC400DA6239 /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = treemap.h; path = source/datastructures/treemap.h; sourceTree = SOURCE_ROOT; }; A7E9B86612D37EC400DA6239 /* treenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = treenode.cpp; path = source/datastructures/treenode.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86712D37EC400DA6239 /* treenode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = treenode.h; path = source/datastructures/treenode.h; sourceTree = SOURCE_ROOT; }; A7E9B86812D37EC400DA6239 /* trimflowscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trimflowscommand.cpp; path = source/commands/trimflowscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86912D37EC400DA6239 /* trimflowscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = trimflowscommand.h; path = source/commands/trimflowscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B86A12D37EC400DA6239 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trimseqscommand.cpp; path = source/commands/trimseqscommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86B12D37EC400DA6239 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = trimseqscommand.h; path = source/commands/trimseqscommand.h; sourceTree = SOURCE_ROOT; }; A7E9B86C12D37EC400DA6239 /* unifracunweightedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = unifracunweightedcommand.cpp; path = source/commands/unifracunweightedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86D12D37EC400DA6239 /* unifracunweightedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = unifracunweightedcommand.h; path = source/commands/unifracunweightedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B86E12D37EC400DA6239 /* unifracweightedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = unifracweightedcommand.cpp; path = source/commands/unifracweightedcommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B86F12D37EC400DA6239 /* unifracweightedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = unifracweightedcommand.h; path = source/commands/unifracweightedcommand.h; sourceTree = SOURCE_ROOT; }; A7E9B87012D37EC400DA6239 /* unweighted.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = unweighted.cpp; path = source/calculators/unweighted.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87112D37EC400DA6239 /* unweighted.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = unweighted.h; path = source/calculators/unweighted.h; sourceTree = SOURCE_ROOT; }; A7E9B87212D37EC400DA6239 /* uvest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = uvest.cpp; path = source/calculators/uvest.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87312D37EC400DA6239 /* uvest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = uvest.h; path = source/calculators/uvest.h; sourceTree = SOURCE_ROOT; }; A7E9B87412D37EC400DA6239 /* validcalculator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = validcalculator.cpp; path = source/validcalculator.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87512D37EC400DA6239 /* validcalculator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = validcalculator.h; path = source/validcalculator.h; sourceTree = SOURCE_ROOT; }; A7E9B87612D37EC400DA6239 /* validparameter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = validparameter.cpp; path = source/validparameter.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87712D37EC400DA6239 /* validparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = validparameter.h; path = source/validparameter.h; sourceTree = SOURCE_ROOT; }; A7E9B87812D37EC400DA6239 /* venn.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = venn.cpp; path = source/venn.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87912D37EC400DA6239 /* venn.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = venn.h; path = source/venn.h; sourceTree = SOURCE_ROOT; }; A7E9B87A12D37EC400DA6239 /* venncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = venncommand.cpp; path = source/commands/venncommand.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87B12D37EC400DA6239 /* venncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = venncommand.h; path = source/commands/venncommand.h; sourceTree = SOURCE_ROOT; }; A7E9B87C12D37EC400DA6239 /* weighted.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = weighted.cpp; path = source/calculators/weighted.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87D12D37EC400DA6239 /* weighted.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = weighted.h; path = source/calculators/weighted.h; sourceTree = SOURCE_ROOT; }; A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = weightedlinkage.cpp; path = source/weightedlinkage.cpp; sourceTree = SOURCE_ROOT; }; A7E9B87F12D37EC400DA6239 /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whittaker.cpp; path = source/calculators/whittaker.cpp; sourceTree = SOURCE_ROOT; }; A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whittaker.h; path = source/calculators/whittaker.h; sourceTree = SOURCE_ROOT; }; A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classifytreecommand.cpp; path = source/commands/classifytreecommand.cpp; sourceTree = SOURCE_ROOT; }; A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classifytreecommand.h; path = source/commands/classifytreecommand.h; sourceTree = SOURCE_ROOT; }; A7F9F5CD141A5E500032F693 /* sequenceparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sequenceparser.h; path = source/datastructures/sequenceparser.h; sourceTree = SOURCE_ROOT; }; A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sequenceparser.cpp; path = source/datastructures/sequenceparser.cpp; sourceTree = SOURCE_ROOT; }; A7FA10001302E096003860FE /* mantelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mantelcommand.h; path = source/commands/mantelcommand.h; sourceTree = SOURCE_ROOT; }; A7FA10011302E096003860FE /* mantelcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mantelcommand.cpp; path = source/commands/mantelcommand.cpp; sourceTree = SOURCE_ROOT; }; A7FC480C12D788F20055BC5C /* linearalgebra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = linearalgebra.h; path = source/linearalgebra.h; sourceTree = SOURCE_ROOT; }; A7FC480D12D788F20055BC5C /* linearalgebra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = linearalgebra.cpp; path = source/linearalgebra.cpp; sourceTree = SOURCE_ROOT; }; A7FC486512D795D60055BC5C /* pcacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pcacommand.h; path = source/commands/pcacommand.h; sourceTree = SOURCE_ROOT; }; A7FC486612D795D60055BC5C /* pcacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pcacommand.cpp; path = source/commands/pcacommand.cpp; sourceTree = SOURCE_ROOT; }; A7FE7C3E1330EA1000F7B327 /* getcurrentcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getcurrentcommand.h; path = source/commands/getcurrentcommand.h; sourceTree = SOURCE_ROOT; }; A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getcurrentcommand.cpp; path = source/commands/getcurrentcommand.cpp; sourceTree = SOURCE_ROOT; }; A7FE7E6B13311EA400F7B327 /* setcurrentcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = setcurrentcommand.h; path = source/commands/setcurrentcommand.h; sourceTree = SOURCE_ROOT; }; A7FE7E6C13311EA400F7B327 /* setcurrentcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = setcurrentcommand.cpp; path = source/commands/setcurrentcommand.cpp; sourceTree = SOURCE_ROOT; }; A7FF19F0140FFDA500AD216D /* trimoligos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = trimoligos.h; path = source/trimoligos.h; sourceTree = SOURCE_ROOT; }; A7FF19F1140FFDA500AD216D /* trimoligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trimoligos.cpp; path = source/trimoligos.cpp; sourceTree = SOURCE_ROOT; }; A7FFB556142CA02C004884F2 /* summarytaxcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = summarytaxcommand.h; path = source/commands/summarytaxcommand.h; sourceTree = SOURCE_ROOT; }; A7FFB557142CA02C004884F2 /* summarytaxcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = summarytaxcommand.cpp; path = source/commands/summarytaxcommand.cpp; sourceTree = SOURCE_ROOT; }; F40859AF280F2DDB00F19B1A /* README.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README.txt; sourceTree = ""; }; F40859B0280F3CB200F19B1A /* README.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README.txt; sourceTree = ""; }; F40859B12811AE6500F19B1A /* README.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = README.txt; sourceTree = ""; }; F4103A4B25A3A40F001ED741 /* libboost_filesystem.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libboost_filesystem.a; path = mothur_resources/libs/libboost_filesystem.a; sourceTree = SOURCE_ROOT; }; F4103A4F25A3A411001ED741 /* libboost_iostreams.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libboost_iostreams.a; path = mothur_resources/libs/libboost_iostreams.a; sourceTree = SOURCE_ROOT; }; F4103A5D25A3A7C7001ED741 /* libhdf5_hl.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libhdf5_hl.a; path = mothur_resources/libs/libhdf5_hl.a; sourceTree = SOURCE_ROOT; }; F4103A5E25A3A7C7001ED741 /* libhdf5_hl_cpp.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libhdf5_hl_cpp.a; path = mothur_resources/libs/libhdf5_hl_cpp.a; sourceTree = SOURCE_ROOT; }; F4103A5F25A3A7C7001ED741 /* libhdf5.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libhdf5.a; path = mothur_resources/libs/libhdf5.a; sourceTree = SOURCE_ROOT; }; F4103A6025A3A7C7001ED741 /* libhdf5_cpp.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libhdf5_cpp.a; path = mothur_resources/libs/libhdf5_cpp.a; sourceTree = SOURCE_ROOT; }; F4103A6F25A4C4D2001ED741 /* diversityestimatorcommand.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = diversityestimatorcommand.hpp; path = source/commands/diversityestimatorcommand.hpp; sourceTree = SOURCE_ROOT; }; F4103A7F25A4C831001ED741 /* libgslcblas.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgslcblas.a; path = mothur_resources/libs/libgslcblas.a; sourceTree = SOURCE_ROOT; }; F4103A9D25A4D00F001ED741 /* libgsl.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgsl.a; path = mothur_resources/libs/libgsl.a; sourceTree = SOURCE_ROOT; }; F41A1B8F261257DE00144985 /* kmerdist.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = kmerdist.cpp; path = source/calculators/kmerdist.cpp; sourceTree = SOURCE_ROOT; }; F41A1B90261257DE00144985 /* kmerdist.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = kmerdist.hpp; path = source/calculators/kmerdist.hpp; sourceTree = SOURCE_ROOT; }; F44268EC27BD52D50000C15D /* alignmusclecommand.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = alignmusclecommand.cpp; sourceTree = ""; }; F44268ED27BD52D50000C15D /* alignmusclecommand.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = alignmusclecommand.hpp; sourceTree = ""; }; F45A2E3B25A78B4D00994F76 /* contigsreport.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = contigsreport.hpp; sourceTree = ""; }; F45A2E3C25A78B4D00994F76 /* contigsreport.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = contigsreport.cpp; sourceTree = ""; }; F45A2E5025BF229600994F76 /* main.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = ""; }; F490972626090AC500C1B24F /* External_Libraries_INSTALL */ = {isa = PBXFileReference; lastKnownFileType = text; path = External_Libraries_INSTALL; sourceTree = ""; }; F4A866B5265BE7720010479A /* protein.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = protein.cpp; path = source/datastructures/protein.cpp; sourceTree = SOURCE_ROOT; }; F4A866B6265BE7720010479A /* protein.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = protein.hpp; path = source/datastructures/protein.hpp; sourceTree = SOURCE_ROOT; }; F4A866BD265BE7EC0010479A /* aminoacid.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = aminoacid.cpp; sourceTree = ""; }; F4A866BE265BE7EC0010479A /* aminoacid.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = aminoacid.hpp; sourceTree = ""; }; F4A866CA265EBD270010479A /* jtt.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = jtt.hpp; sourceTree = ""; }; F4A866CF266912830010479A /* proteindb.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = proteindb.cpp; sourceTree = ""; }; F4A866D0266912830010479A /* proteindb.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = proteindb.hpp; sourceTree = ""; }; F4A866DA266946AB0010479A /* storagedatabase.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = storagedatabase.hpp; sourceTree = ""; }; F4A86700268B71A80010479A /* pmb.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = pmb.hpp; sourceTree = ""; }; F4A86707268E3AFA0010479A /* pam.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = pam.hpp; sourceTree = ""; }; F4A86711268F5CCE0010479A /* kimura.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = kimura.cpp; sourceTree = ""; }; F4A86712268F5CCE0010479A /* kimura.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = kimura.hpp; sourceTree = ""; }; F4B4B0DA27396EF7003B2133 /* translateseqscommand.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = translateseqscommand.cpp; sourceTree = ""; }; F4B4B0DB27396EF7003B2133 /* translateseqscommand.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = translateseqscommand.hpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ 481FB5161AC0A63E0076CFF3 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; 8DD76FAD0486AB0100D96B5E /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ 08FB7794FE84155DC02AAC07 /* mothur */ = { isa = PBXGroup; children = ( 4837E5D622DE1BC400D3234B /* TestBatches */, 08FB7795FE84155DC02AAC07 /* Source */, 481FB51A1AC0A63E0076CFF3 /* TestMothur */, F45A2E4F25BF229600994F76 /* mothur */, 1AB674ADFE9D54B511CA2CBB /* Products */, 489ECDA2215EB30A0036D42C /* Frameworks */, ); name = mothur; sourceTree = ""; }; 08FB7795FE84155DC02AAC07 /* Source */ = { isa = PBXGroup; children = ( 4875F69922DCC723006A7D8C /* Ubuntu_20_Build.txt */, F490972626090AC500C1B24F /* External_Libraries_INSTALL */, A7A61F1A130035C800E05B6B /* LICENSE.md */, 48FD9946243E5FB10017C521 /* Makefile_cluster */, A70332B512D3A13400761E33 /* Makefile */, 484F21691BA1C5F8001C1B5F /* makefile-internal */, 481623E31B58267D004C60B7 /* INSTALL.md */, 2114A7671C654D7400D3D8D9 /* averagelinkage.cpp */, A77B718A173D40E4002163C2 /* calcsparcc.h */, A77B7189173D40E4002163C2 /* calcsparcc.cpp */, A7E9BA4F12D398D700DA6239 /* clearcut */, A7E9B69812D37EC400DA6239 /* cluster.cpp */, A7E9B69912D37EC400DA6239 /* cluster.hpp */, A7E9B69A12D37EC400DA6239 /* clusterclassic.cpp */, A7E9B69B12D37EC400DA6239 /* clusterclassic.h */, A7E9BA3F12D395F700DA6239 /* calculators */, A7E9BA4512D3965600DA6239 /* chimera */, A7E9BA4B12D3966900DA6239 /* classifier */, A7E9B6A612D37EC400DA6239 /* collect.cpp */, A7E9B6A712D37EC400DA6239 /* collect.h */, A7E9B6AA12D37EC400DA6239 /* collectdisplay.h */, A7E9B6AB12D37EC400DA6239 /* collectorscurvedata.h */, 48F98E4C1A9CFD670005E81B /* completelinkage.cpp */, A7E9BA3812D3956100DA6239 /* commands */, A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */, A7E9B6B012D37EC400DA6239 /* commandfactory.hpp */, A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */, A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */, A7DAAFA3133A254E003956EB /* commandparameter.h */, A7D395C1184FA34300A350D7 /* communitytype */, A7E9BA4212D3960D00DA6239 /* containers */, A7E9B6B612D37EC400DA6239 /* consensus.h */, A7E9B6B512D37EC400DA6239 /* consensus.cpp */, A7AACFBA132FE008003D6C4D /* currentfile.h */, 48B44EED1FB5006500789C45 /* currentfile.cpp */, A7E9B6C912D37EC400DA6239 /* display.h */, A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */, A7E9B6D212D37EC400DA6239 /* dlibshuff.h */, A7E9B6D912D37EC400DA6239 /* endiannessmacros.h */, 48ED1E76235E1A3B003E66F7 /* engines */, A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */, A7E9B6E112D37EC400DA6239 /* fileoutput.h */, A7E9B71112D37EC400DA6239 /* gotohoverlap.hpp */, A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */, A7E9B71C12D37EC400DA6239 /* heatmap.cpp */, A7E9B71D12D37EC400DA6239 /* heatmap.h */, A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */, A7E9B72112D37EC400DA6239 /* heatmapsim.h */, A7E9B72E12D37EC400DA6239 /* inputdata.h */, A7E9B72D12D37EC400DA6239 /* inputdata.cpp */, A7E9B73912D37EC400DA6239 /* libshuff.cpp */, A7E9B73A12D37EC400DA6239 /* libshuff.h */, A7FC480C12D788F20055BC5C /* linearalgebra.h */, A7FC480D12D788F20055BC5C /* linearalgebra.cpp */, A7E9BA5612D39BD800DA6239 /* metastats */, A7E9B75B12D37EC400DA6239 /* mothur.cpp */, A7E9B75C12D37EC400DA6239 /* mothur.h */, A7E9B75D12D37EC400DA6239 /* mothurout.cpp */, A7E9B75E12D37EC400DA6239 /* mothurout.h */, A774104714696F320098E6AC /* myseqdist.h */, A774104614696F320098E6AC /* myseqdist.cpp */, A7E9B76112D37EC400DA6239 /* nast.cpp */, A7E9B76212D37EC400DA6239 /* nast.hpp */, A7E9B76712D37EC400DA6239 /* noalign.cpp */, A7E9B76812D37EC400DA6239 /* noalign.hpp */, A7E9B76512D37EC400DA6239 /* needlemanoverlap.cpp */, A7E9B76612D37EC400DA6239 /* needlemanoverlap.hpp */, A7E9B77012D37EC400DA6239 /* observable.h */, 48FB99CD20A4F3FB00FF9F6E /* optifitcluster.cpp */, 48FB99CE20A4F3FB00FF9F6E /* optifitcluster.hpp */, 48910D4C1D58CBFC00F60EDB /* opticluster.h */, 48910D451D58CAD700F60EDB /* opticluster.cpp */, A7E9B77512D37EC400DA6239 /* optionparser.cpp */, A7E9B77612D37EC400DA6239 /* optionparser.h */, A7E9B77B12D37EC400DA6239 /* overlap.cpp */, A7E9B77C12D37EC400DA6239 /* overlap.hpp */, A7E9B7A712D37EC400DA6239 /* raredisplay.cpp */, A7E9B7A812D37EC400DA6239 /* raredisplay.h */, A7E9B7A912D37EC400DA6239 /* rarefact.cpp */, A7E9B7AA12D37EC400DA6239 /* rarefact.h */, A7E9B7AD12D37EC400DA6239 /* rarefactioncurvedata.h */, 7E6BE10812F710D8007ADDBE /* refchimeratest.h */, 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */, 48B01D2A2016470F006BE140 /* sensspeccalc.cpp */, 48B01D2B2016470F006BE140 /* sensspeccalc.hpp */, A77410F514697C300098E6AC /* seqnoise.h */, 486741981FD9ACCE00B07480 /* sharedwriter.hpp */, A7E9BA5312D39A5E00DA6239 /* read */, A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */, A7E9B83012D37EC400DA6239 /* slibshuff.cpp */, A7E9B83112D37EC400DA6239 /* slibshuff.h */, A7876A28152A018B00A0AE86 /* subsample.h */, A7876A25152A017C00A0AE86 /* subsample.cpp */, 4889EA211E8962D50054E0BB /* summary.hpp */, 4889EA201E8962D50054E0BB /* summary.cpp */, 7B17437A17AF6F02004C161B /* svm */, A7C3DC0E14FE469500FE1924 /* trialswap2.h */, A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */, A7FF19F0140FFDA500AD216D /* trimoligos.h */, A7FF19F1140FFDA500AD216D /* trimoligos.cpp */, A77410F414697C300098E6AC /* seqnoise.cpp */, 48B44EF01FB9EF8200789C45 /* utils.cpp */, 48B44EF11FB9EF8200789C45 /* utils.hpp */, 48789AEE206176EF00A7D848 /* utf8 */, A7E9B87412D37EC400DA6239 /* validcalculator.cpp */, A7E9B87512D37EC400DA6239 /* validcalculator.h */, A7E9B87612D37EC400DA6239 /* validparameter.cpp */, A7E9B87712D37EC400DA6239 /* validparameter.h */, A7E9B87812D37EC400DA6239 /* venn.cpp */, A7E9B87912D37EC400DA6239 /* venn.h */, 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */, 489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */, A7D9378B17B15215001E90B0 /* wilcox.h */, A7D9378917B146B5001E90B0 /* wilcox.cpp */, A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */, 4867419A1FD9B3FE00B07480 /* writer.h */, ); name = Source; sourceTree = ""; }; 1AB674ADFE9D54B511CA2CBB /* Products */ = { isa = PBXGroup; children = ( 8DD76FB20486AB0100D96B5E /* mothur */, 481FB5191AC0A63E0076CFF3 /* TestMothur */, ); name = Products; sourceTree = ""; }; 480D1E2B1EA6858700BF9C77 /* fakes */ = { isa = PBXGroup; children = ( 480D1E2D1EA685C500BF9C77 /* fakemcc.hpp */, 48CC010E1EB79E49009D61E6 /* fakeoligos.h */, 480D1E2F1EA92D5500BF9C77 /* fakeoptimatrix.cpp */, 480D1E301EA92D5500BF9C77 /* fakeoptimatrix.hpp */, ); name = fakes; path = TestMothur; sourceTree = SOURCE_ROOT; }; 481FB51A1AC0A63E0076CFF3 /* TestMothur */ = { isa = PBXGroup; children = ( 481FB51B1AC0A63E0076CFF3 /* main.cpp */, 480D1E2B1EA6858700BF9C77 /* fakes */, 48F06CCA1D74BC6F004A45DD /* testclassifier */, 48D6E9691CA4262A008DF76B /* dataset.cpp */, 48D6E96A1CA4262A008DF76B /* dataset.h */, 48910D4F1D58E26C00F60EDB /* distcdataset.h */, 48910D501D58E26C00F60EDB /* distcdataset.cpp */, 48576EA61D05F59300BBC9C0 /* distpdataset.cpp */, 48576EA71D05F59300BBC9C0 /* distpdataset.h */, 4827A4DA1CB3ED2100345170 /* fastqdataset.cpp */, 4827A4DB1CB3ED2100345170 /* fastqdataset.h */, 480D1E281EA681D100BF9C77 /* testclustercalcs.cpp */, 480D1E291EA681D100BF9C77 /* testclustercalcs.hpp */, 48910D4E1D58E26C00F60EDB /* testopticluster.h */, 48910D4D1D58E26C00F60EDB /* testopticluster.cpp */, 48098ED4219DE7A500031FA4 /* testsubsample.cpp */, 48098ED5219DE7A500031FA4 /* testsubsample.hpp */, 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */, 4846AD891D3810DD00DE9913 /* testtrimoligos.hpp */, 48D6E9661CA42389008DF76B /* testvsearchfileparser.cpp */, 48D6E9671CA42389008DF76B /* testvsearchfileparser.h */, 481FB5221AC0AA010076CFF3 /* testcontainers */, 481FB5211AC0A9B40076CFF3 /* testcommands */, ); path = TestMothur; sourceTree = ""; }; 481FB5211AC0A9B40076CFF3 /* testcommands */ = { isa = PBXGroup; children = ( 4829D9651B8387D0002EEED4 /* testbiominfocommand.cpp */, 4829D9661B8387D0002EEED4 /* testbiominfocommand.h */, 48C728741B6AB4CD00D40830 /* testgetgroupscommand.h */, 48C728731B6AB4CD00D40830 /* testgetgroupscommand.cpp */, 48C728691B69598400D40830 /* testmergegroupscommand.h */, 48C728681B69598400D40830 /* testmergegroupscommand.cpp */, 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */, 48A11C6D1CDA40F0003481D8 /* testrenamefilecommand.h */, 48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */, 48B662021BBB1B6600997EE4 /* testrenameseqscommand.h */, 48C7286F1B6AB3B900D40830 /* testremovegroupscommand.cpp */, 48C728701B6AB3B900D40830 /* testremovegroupscommand.h */, 481FB52D1AC1B0CB0076CFF3 /* testsetseedcommand.cpp */, ); name = testcommands; path = TestMothur; sourceTree = SOURCE_ROOT; }; 481FB5221AC0AA010076CFF3 /* testcontainers */ = { isa = PBXGroup; children = ( 480E8DAF1CAB12ED00A0D137 /* testfastqread.cpp */, 480E8DB01CAB12ED00A0D137 /* testfastqread.h */, 4810D5B5218208CC00C668E8 /* testcounttable.cpp */, 4810D5B6218208CC00C668E8 /* testcounttable.hpp */, 489387F7210F633E00284329 /* testOligos.cpp */, 489387F8210F633E00284329 /* testOligos.hpp */, 48576EA31D05E8F600BBC9C0 /* testoptimatrix.cpp */, 48576EA41D05E8F600BBC9C0 /* testoptimatrix.h */, 489387F42107A60C00284329 /* testoptirefmatrix.cpp */, 489387F52107A60C00284329 /* testoptirefmatrix.hpp */, 48C728641B66A77800D40830 /* testsequence.cpp */, 48C728761B6AB4EE00D40830 /* testsequence.h */, 4803D5AE211CD839001C63B5 /* testsharedrabundfloatvector.cpp */, 4803D5AF211CD839001C63B5 /* testsharedrabundfloatvector.hpp */, 4803D5B421231D9D001C63B5 /* testsharedrabundfloatvectors.cpp */, 4803D5B521231D9D001C63B5 /* testsharedrabundfloatvectors.hpp */, 4803D5AB211CA67F001C63B5 /* testsharedrabundvector.cpp */, 4803D5AC211CA67F001C63B5 /* testsharedrabundvector.hpp */, 4803D5B1211DDA5A001C63B5 /* testsharedrabundvectors.cpp */, 4803D5B2211DDA5A001C63B5 /* testsharedrabundvectors.hpp */, ); name = testcontainers; path = TestMothur; sourceTree = SOURCE_ROOT; }; 484976DC22552BEA00F3A291 /* diversitycalcs */ = { isa = PBXGroup; children = ( 484976DD22552E0B00F3A291 /* erarefaction.cpp */, 484976DE22552E0B00F3A291 /* erarefaction.hpp */, 48E7E0A42278AD4800B74910 /* diversityutils.cpp */, 48E7E0A52278AD4800B74910 /* diversityutils.hpp */, 484976E12255412400F3A291 /* igabundance.cpp */, 484976E22255412400F3A291 /* igabundance.hpp */, 4809EC9F2280898E00B4D0E5 /* igrarefaction.cpp */, 4809ECA02280898E00B4D0E5 /* igrarefaction.hpp */, 4809ECA322831A5E00B4D0E5 /* lnabundance.cpp */, 4809ECA422831A5E00B4D0E5 /* lnabundance.hpp */, 4815BEAF2289E13500677EE2 /* lnrarefaction.cpp */, 4815BEB02289E13500677EE2 /* lnrarefaction.hpp */, 4815BEB2228B371E00677EE2 /* lnshift.cpp */, 4815BEB3228B371E00677EE2 /* lnshift.hpp */, 4815BEB6228DD18400677EE2 /* lsabundance.cpp */, 4815BEB7228DD18400677EE2 /* lsabundance.hpp */, 4815BEBA2293189600677EE2 /* lsrarefaction.cpp */, 4815BEBB2293189600677EE2 /* lsrarefaction.hpp */, 4815BEBF2295CE6800677EE2 /* siabundance.cpp */, 4815BEC02295CE6800677EE2 /* siabundance.hpp */, 4815BEC32296F19500677EE2 /* sirarefaction.cpp */, 4815BEC42296F19500677EE2 /* sirarefaction.hpp */, 4815BEC722970FA700677EE2 /* sishift.cpp */, 4815BEC822970FA700677EE2 /* sishift.hpp */, 483A9BAC225BBE55006102DF /* metroig.cpp */, 483A9BAD225BBE55006102DF /* metroig.hpp */, 4809EC94227B2CB500B4D0E5 /* metrolognormal.hpp */, 48E7E0A12278A21B00B74910 /* metrolognormal.cpp */, 4809EC96227B405700B4D0E5 /* metrologstudent.cpp */, 4809EC9A227B5D2500B4D0E5 /* metrologstudent.hpp */, 4809EC9B227C9B3100B4D0E5 /* metrosichel.cpp */, 4809EC9C227C9B3100B4D0E5 /* metrosichel.hpp */, ); name = diversitycalcs; sourceTree = ""; }; 48789AEE206176EF00A7D848 /* utf8 */ = { isa = PBXGroup; children = ( 48789AEF2061776100A7D848 /* utf8.h */, 48789AF02061776100A7D848 /* checked.h */, 48789AF12061776100A7D848 /* core.h */, 48789AF22061776100A7D848 /* unchecked.h */, ); name = utf8; sourceTree = SOURCE_ROOT; }; 489ECDA2215EB30A0036D42C /* Frameworks */ = { isa = PBXGroup; children = ( F4103A9D25A4D00F001ED741 /* libgsl.a */, F4103A7F25A4C831001ED741 /* libgslcblas.a */, F4103A6025A3A7C7001ED741 /* libhdf5_cpp.a */, F4103A5E25A3A7C7001ED741 /* libhdf5_hl_cpp.a */, F4103A5D25A3A7C7001ED741 /* libhdf5_hl.a */, F4103A5F25A3A7C7001ED741 /* libhdf5.a */, F4103A4F25A3A411001ED741 /* libboost_iostreams.a */, F4103A4B25A3A40F001ED741 /* libboost_filesystem.a */, ); name = Frameworks; sourceTree = ""; }; 48E5443E1E9C28CC00FF6AB8 /* clustercalcs */ = { isa = PBXGroup; children = ( 48E5446E1E9D3B2D00FF6AB8 /* accuracy.cpp */, 48E5446F1E9D3B2D00FF6AB8 /* accuracy.hpp */, 48E5446B1E9D3A8C00FF6AB8 /* f1score.hpp */, 48E5446A1E9D3A8C00FF6AB8 /* f1score.cpp */, 48E5447A1E9D3F0400FF6AB8 /* fdr.cpp */, 48E5447B1E9D3F0400FF6AB8 /* fdr.hpp */, 48E5445B1E9C2F0F00FF6AB8 /* fn.cpp */, 48E5445C1E9C2F0F00FF6AB8 /* fn.hpp */, 48E544581E9C2E6500FF6AB8 /* fp.hpp */, 48E544571E9C2E6500FF6AB8 /* fp.cpp */, 48E5445F1E9C2FB800FF6AB8 /* fpfn.cpp */, 48E544601E9C2FB800FF6AB8 /* fpfn.hpp */, 48E544401E9C292900FF6AB8 /* mcc.hpp */, 48E5443F1E9C292900FF6AB8 /* mcc.cpp */, 48E544761E9D3CE400FF6AB8 /* npv.cpp */, 48E544771E9D3CE400FF6AB8 /* npv.hpp */, 48E544721E9D3C1200FF6AB8 /* ppv.cpp */, 48E544731E9D3C1200FF6AB8 /* ppv.hpp */, 48E544431E9C2B1000FF6AB8 /* sensitivity.cpp */, 48E544441E9C2B1000FF6AB8 /* sensitivity.hpp */, 48E544471E9C2BE100FF6AB8 /* specificity.cpp */, 48E544481E9C2BE100FF6AB8 /* specificity.hpp */, 48E544531E9C2DF500FF6AB8 /* tn.cpp */, 48E544541E9C2DF500FF6AB8 /* tn.hpp */, 48E5444F1E9C2CFD00FF6AB8 /* tp.cpp */, 48E544501E9C2CFD00FF6AB8 /* tp.hpp */, 48E5444B1E9C2C8F00FF6AB8 /* tptn.cpp */, 48E5444C1E9C2C8F00FF6AB8 /* tptn.hpp */, ); name = clustercalcs; sourceTree = ""; }; 48E544661E9D12CA00FF6AB8 /* otucalcs */ = { isa = PBXGroup; children = ( A7E9B79D12D37EC400DA6239 /* qstat.cpp */, A7E9B79E12D37EC400DA6239 /* qstat.h */, A7E9B7E512D37EC400DA6239 /* shannon.cpp */, A7E9B7E612D37EC400DA6239 /* shannon.h */, A7E9B7E712D37EC400DA6239 /* shannoneven.cpp */, A7E9B7E812D37EC400DA6239 /* shannoneven.h */, A7A09B0E18773BF700FAA081 /* shannonrange.h */, A7A09B0F18773C0E00FAA081 /* shannonrange.cpp */, A7E9B7EA12D37EC400DA6239 /* sharedace.h */, A7E9B7E912D37EC400DA6239 /* sharedace.cpp */, A7E9B7EC12D37EC400DA6239 /* sharedanderbergs.cpp */, A7E9B7ED12D37EC400DA6239 /* sharedanderbergs.h */, A7E9B7EE12D37EC400DA6239 /* sharedbraycurtis.cpp */, A7E9B7EF12D37EC400DA6239 /* sharedbraycurtis.h */, A7E9B7F012D37EC400DA6239 /* sharedchao1.cpp */, A7E9B7F112D37EC400DA6239 /* sharedchao1.h */, A7E9B7F412D37EC400DA6239 /* sharedjabund.cpp */, A7E9B7F512D37EC400DA6239 /* sharedjabund.h */, A7E9B7F612D37EC400DA6239 /* sharedjackknife.cpp */, A7E9B7F712D37EC400DA6239 /* sharedjackknife.h */, A7E9B64F12D37EC300DA6239 /* ace.cpp */, A7E9B65012D37EC300DA6239 /* ace.h */, A7E9B65E12D37EC300DA6239 /* bergerparker.cpp */, A7E9B65F12D37EC300DA6239 /* bergerparker.h */, A7E9B66612D37EC400DA6239 /* boneh.cpp */, A7E9B66712D37EC400DA6239 /* boneh.h */, A7E9B66812D37EC400DA6239 /* bootstrap.cpp */, A7E9B66912D37EC400DA6239 /* bootstrap.h */, A7E9B66C12D37EC400DA6239 /* bstick.cpp */, A7E9B66D12D37EC400DA6239 /* bstick.h */, A7E9B67012D37EC400DA6239 /* canberra.cpp */, A7E9B67112D37EC400DA6239 /* canberra.h */, A7E9B67612D37EC400DA6239 /* chao1.cpp */, A7E9B67712D37EC400DA6239 /* chao1.h */, A7E9B6BB12D37EC400DA6239 /* coverage.cpp */, A7E9B6BC12D37EC400DA6239 /* coverage.h */, A7E9B6D712D37EC400DA6239 /* efron.cpp */, A7E9B6D812D37EC400DA6239 /* efron.h */, A7E9B6F012D37EC400DA6239 /* geom.cpp */, A7E9B6F112D37EC400DA6239 /* geom.h */, A7E9B70E12D37EC400DA6239 /* goodscoverage.cpp */, A7E9B70F12D37EC400DA6239 /* goodscoverage.h */, A7E9B71212D37EC400DA6239 /* gower.cpp */, A7E9B71312D37EC400DA6239 /* gower.h */, A7E9B71612D37EC400DA6239 /* hamming.cpp */, A7E9B71712D37EC400DA6239 /* hamming.h */, A7E9B72412D37EC400DA6239 /* heip.cpp */, A7E9B72512D37EC400DA6239 /* heip.h */, A7E9B72612D37EC400DA6239 /* hellinger.cpp */, A7E9B72712D37EC400DA6239 /* hellinger.h */, A7E9B72F12D37EC400DA6239 /* invsimpson.cpp */, A7E9B73012D37EC400DA6239 /* invsimpson.h */, A7E9B73112D37EC400DA6239 /* jackknife.cpp */, A7E9B73212D37EC400DA6239 /* jackknife.h */, A7E9B74112D37EC400DA6239 /* logsd.cpp */, A7E9B74212D37EC400DA6239 /* logsd.h */, A7E9B74712D37EC400DA6239 /* manhattan.cpp */, A7E9B74812D37EC400DA6239 /* manhattan.h */, A7E9B74B12D37EC400DA6239 /* memchi2.cpp */, A7E9B74C12D37EC400DA6239 /* memchi2.h */, A7E9B74D12D37EC400DA6239 /* memchord.cpp */, A7E9B74E12D37EC400DA6239 /* memchord.h */, A7E9B74F12D37EC400DA6239 /* memeuclidean.cpp */, A7E9B75012D37EC400DA6239 /* memeuclidean.h */, A7E9B75112D37EC400DA6239 /* mempearson.cpp */, A7E9B75212D37EC400DA6239 /* mempearson.h */, A7E9B76D12D37EC400DA6239 /* npshannon.cpp */, A7E9B76E12D37EC400DA6239 /* npshannon.h */, A7E9B76F12D37EC400DA6239 /* nseqs.h */, A7E9B77112D37EC400DA6239 /* odum.cpp */, A7E9B77212D37EC400DA6239 /* odum.h */, A7E9B7F812D37EC400DA6239 /* sharedjclass.cpp */, A7E9B7F912D37EC400DA6239 /* sharedjclass.h */, A7E9B7FA12D37EC400DA6239 /* sharedjest.cpp */, A7E9B7FB12D37EC400DA6239 /* sharedjest.h */, A7222D711856276C0055A993 /* sharedjsd.h */, A7222D721856277C0055A993 /* sharedjsd.cpp */, A7E9B7FC12D37EC400DA6239 /* sharedkstest.cpp */, A7E9B7FD12D37EC400DA6239 /* sharedkstest.h */, A7E9B7FE12D37EC400DA6239 /* sharedkulczynski.cpp */, A7E9B7FF12D37EC400DA6239 /* sharedkulczynski.h */, A7E9B80012D37EC400DA6239 /* sharedkulczynskicody.cpp */, A7E9B80112D37EC400DA6239 /* sharedkulczynskicody.h */, A7E9B80212D37EC400DA6239 /* sharedlennon.cpp */, A7E9B80312D37EC400DA6239 /* sharedlennon.h */, A7E9B80612D37EC400DA6239 /* sharedmarczewski.cpp */, A7E9B80712D37EC400DA6239 /* sharedmarczewski.h */, A7E9B80812D37EC400DA6239 /* sharedmorisitahorn.cpp */, A7E9B80912D37EC400DA6239 /* sharedmorisitahorn.h */, A7E9B80A12D37EC400DA6239 /* sharednseqs.h */, A7E9B80B12D37EC400DA6239 /* sharedochiai.cpp */, A7E9B80C12D37EC400DA6239 /* sharedochiai.h */, 48705AC119BE32C50075E977 /* sharedrjsd.cpp */, 48705AC219BE32C50075E977 /* sharedrjsd.h */, A7E9B81512D37EC400DA6239 /* sharedsobs.cpp */, A7E9B81612D37EC400DA6239 /* sharedsobs.h */, A7E9B81712D37EC400DA6239 /* sharedsobscollectsummary.cpp */, A7E9B81812D37EC400DA6239 /* sharedsobscollectsummary.h */, A7E9B81912D37EC400DA6239 /* sharedsorabund.cpp */, A7E9B81A12D37EC400DA6239 /* sharedsorabund.h */, A7E9B81B12D37EC400DA6239 /* sharedsorclass.cpp */, A7E9B81C12D37EC400DA6239 /* sharedsorclass.h */, A7E9B81D12D37EC400DA6239 /* sharedsorest.cpp */, A7E9B81E12D37EC400DA6239 /* sharedsorest.h */, A7E9B81F12D37EC400DA6239 /* sharedthetan.cpp */, A7E9B82012D37EC400DA6239 /* sharedthetan.h */, A7E9B82112D37EC400DA6239 /* sharedthetayc.cpp */, A7E9B82212D37EC400DA6239 /* sharedthetayc.h */, A7E9B82512D37EC400DA6239 /* shen.cpp */, A7E9B82612D37EC400DA6239 /* shen.h */, A7E9B82912D37EC400DA6239 /* simpson.cpp */, A7E9B82A12D37EC400DA6239 /* simpson.h */, A7E9B82B12D37EC400DA6239 /* simpsoneven.cpp */, A7E9B82C12D37EC400DA6239 /* simpsoneven.h */, A7E9B83212D37EC400DA6239 /* smithwilson.cpp */, A7E9B83312D37EC400DA6239 /* smithwilson.h */, A7E9B83412D37EC400DA6239 /* sobs.h */, A7E9B83512D37EC400DA6239 /* soergel.cpp */, A7E9B83612D37EC400DA6239 /* soergel.h */, A7E9B83712D37EC400DA6239 /* solow.cpp */, A7E9B83812D37EC400DA6239 /* solow.h */, A7E9B83B12D37EC400DA6239 /* spearman.cpp */, A7E9B83C12D37EC400DA6239 /* spearman.h */, A7E9B83D12D37EC400DA6239 /* speciesprofile.cpp */, A7E9B83E12D37EC400DA6239 /* speciesprofile.h */, A7E9B84512D37EC400DA6239 /* structchi2.cpp */, A7E9B84612D37EC400DA6239 /* structchi2.h */, A7E9B84712D37EC400DA6239 /* structchord.cpp */, A7E9B84812D37EC400DA6239 /* structchord.h */, A7E9B84912D37EC400DA6239 /* structeuclidean.cpp */, A7E9B84A12D37EC400DA6239 /* structeuclidean.h */, A7E9B84B12D37EC400DA6239 /* structkulczynski.cpp */, A7E9B84C12D37EC400DA6239 /* structkulczynski.h */, A7E9B84D12D37EC400DA6239 /* structpearson.cpp */, A7E9B84E12D37EC400DA6239 /* structpearson.h */, A7E9B87212D37EC400DA6239 /* uvest.cpp */, A7E9B87312D37EC400DA6239 /* uvest.h */, A7E9B87F12D37EC400DA6239 /* whittaker.cpp */, A7E9B88012D37EC400DA6239 /* whittaker.h */, ); name = otucalcs; sourceTree = SOURCE_ROOT; }; 48E544671E9D14C500FF6AB8 /* distcalcs */ = { isa = PBXGroup; children = ( A7E9B6D512D37EC400DA6239 /* eachgapdist.h */, 481E40E2244F6A050059C925 /* eachgapdist.cpp */, A7E9B6D612D37EC400DA6239 /* eachgapignore.h */, 481E40DE244F619D0059C925 /* eachgapignore.cpp */, A7E9B72A12D37EC400DA6239 /* ignoregaps.h */, 481E40DC244F52460059C925 /* ignoregaps.cpp */, F4A866CA265EBD270010479A /* jtt.hpp */, F4A86711268F5CCE0010479A /* kimura.cpp */, F4A86712268F5CCE0010479A /* kimura.hpp */, F41A1B8F261257DE00144985 /* kmerdist.cpp */, F41A1B90261257DE00144985 /* kmerdist.hpp */, A7E9B77312D37EC400DA6239 /* onegapdist.h */, 48998B68242E785100DBD0A9 /* onegapdist.cpp */, A7E9B77412D37EC400DA6239 /* onegapignore.h */, 481E40DA244DFF5A0059C925 /* onegapignore.cpp */, F4A86707268E3AFA0010479A /* pam.hpp */, F4A86700268B71A80010479A /* pmb.hpp */, ); name = distcalcs; sourceTree = ""; }; 48E544681E9D175100FF6AB8 /* unifraccalcs */ = { isa = PBXGroup; children = ( A7E9B78412D37EC400DA6239 /* parsimony.h */, A7E9B78312D37EC400DA6239 /* parsimony.cpp */, A7E9B87112D37EC400DA6239 /* unweighted.h */, A7E9B87012D37EC400DA6239 /* unweighted.cpp */, A7E9B87D12D37EC400DA6239 /* weighted.h */, A7E9B87C12D37EC400DA6239 /* weighted.cpp */, ); name = unifraccalcs; sourceTree = ""; }; 48E544691E9D17E000FF6AB8 /* clearcutcalcs */ = { isa = PBXGroup; children = ( A7E9B6C012D37EC400DA6239 /* dayhoff.h */, A7E9B79912D37EC400DA6239 /* prng.cpp */, A7E9B79A12D37EC400DA6239 /* prng.h */, ); name = clearcutcalcs; sourceTree = ""; }; 48ED1E76235E1A3B003E66F7 /* engines */ = { isa = PBXGroup; children = ( A7E9B6DB12D37EC400DA6239 /* engine.hpp */, 48ED1E7F235E1D59003E66F7 /* batchengine.cpp */, 48ED1E80235E1D59003E66F7 /* batchengine.hpp */, 48ED1E7B235E1BB4003E66F7 /* interactengine.cpp */, 48ED1E7C235E1BB4003E66F7 /* interactengine.hpp */, 48ED1E77235E1ACA003E66F7 /* scriptengine.cpp */, 48ED1E78235E1ACA003E66F7 /* scriptengine.hpp */, ); name = engines; path = source/engines; sourceTree = SOURCE_ROOT; }; 48F06CCA1D74BC6F004A45DD /* testclassifier */ = { isa = PBXGroup; children = ( 48F06CCB1D74BEC4004A45DD /* testphylotree.cpp */, 48F06CCC1D74BEC4004A45DD /* testphylotree.hpp */, ); name = testclassifier; path = TestMothur; sourceTree = SOURCE_ROOT; }; 7B17437A17AF6F02004C161B /* svm */ = { isa = PBXGroup; children = ( 7B21820117AD77BD00286E6A /* svm.cpp */, 7B21820217AD77BD00286E6A /* svm.hpp */, ); name = svm; path = source/svm; sourceTree = SOURCE_ROOT; }; A7D395C1184FA34300A350D7 /* communitytype */ = { isa = PBXGroup; children = ( A7132EAE184E76EB00AAA402 /* communitytype.h */, A7132EB2184E792700AAA402 /* communitytype.cpp */, A7D395C2184FA39300A350D7 /* kmeans.h */, A7D395C3184FA3A200A350D7 /* kmeans.cpp */, A7B093BE18579EF600843CD1 /* pam.h */, A7B093BF18579F0400843CD1 /* pam.cpp */, A7548FAF171440ED00B1F05A /* qFinderDMM.h */, A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */, ); name = communitytype; path = source/communitytype; sourceTree = SOURCE_ROOT; }; A7E9BA3812D3956100DA6239 /* commands */ = { isa = PBXGroup; children = ( A7E9B6AE12D37EC400DA6239 /* command.hpp */, 219C1DE11552C508004209F9 /* newcommandtemplate.h */, 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */, A7E9B65212D37EC300DA6239 /* aligncommand.h */, A7E9B65112D37EC300DA6239 /* aligncommand.cpp */, A7E9B7D412D37EC400DA6239 /* aligncheckcommand.h */, A7E9B7D312D37EC400DA6239 /* aligncheckcommand.cpp */, F44268EC27BD52D50000C15D /* alignmusclecommand.cpp */, F44268ED27BD52D50000C15D /* alignmusclecommand.hpp */, A7A61F2B130062E000E05B6B /* amovacommand.h */, A7A61F2C130062E000E05B6B /* amovacommand.cpp */, A71CB15F130B04A2001E7287 /* anosimcommand.h */, A71CB15E130B04A2001E7287 /* anosimcommand.cpp */, A7E9B66112D37EC300DA6239 /* binsequencecommand.h */, A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */, 48C728781B728D6B00D40830 /* biominfocommand.h */, 48C728771B728D6B00D40830 /* biominfocommand.cpp */, A7E9B67B12D37EC400DA6239 /* chimerabellerophoncommand.h */, A7E9B67A12D37EC400DA6239 /* chimerabellerophoncommand.cpp */, A7E9B67D12D37EC400DA6239 /* chimeraccodecommand.h */, A7E9B67C12D37EC400DA6239 /* chimeraccodecommand.cpp */, A7E9B67F12D37EC400DA6239 /* chimeracheckcommand.h */, A7E9B67E12D37EC400DA6239 /* chimeracheckcommand.cpp */, A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */, A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */, A7E9B68312D37EC400DA6239 /* chimerapintailcommand.h */, A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */, A7E9B68B12D37EC400DA6239 /* chimeraslayercommand.h */, A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */, A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */, A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */, 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */, 48EDB76B1D1320DD00F76E93 /* chimeravsearchcommand.h */, A7E9B68D12D37EC400DA6239 /* chopseqscommand.h */, A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */, A7E9B69112D37EC400DA6239 /* classifyotucommand.h */, A7E9B69012D37EC400DA6239 /* classifyotucommand.cpp */, A7E9B69312D37EC400DA6239 /* classifyseqscommand.h */, A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */, 7B2181FF17AD777B00286E6A /* classifysvmsharedcommand.h */, 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */, A7EEB0F714F29C1B00344B83 /* classifytreecommand.h */, A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */, A7E9B69712D37EC400DA6239 /* clearcutcommand.h */, A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */, A7E9B69D12D37EC400DA6239 /* clustercommand.h */, A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */, A7E9B69F12D37EC400DA6239 /* clusterdoturcommand.h */, A7E9B69E12D37EC400DA6239 /* clusterdoturcommand.cpp */, 48B01D2720163594006BE140 /* clusterfitcommand.cpp */, 48B01D2820163594006BE140 /* clusterfitcommand.hpp */, A7E9B6A112D37EC400DA6239 /* clusterfragmentscommand.h */, A7E9B6A012D37EC400DA6239 /* clusterfragmentscommand.cpp */, A7E9B6A312D37EC400DA6239 /* clustersplitcommand.h */, A7E9B6A212D37EC400DA6239 /* clustersplitcommand.cpp */, A7E9B6A912D37EC400DA6239 /* collectcommand.h */, A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */, A7E9B6AD12D37EC400DA6239 /* collectsharedcommand.h */, A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */, A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */, A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */, A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */, A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */, A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */, A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */, A795840B13F13CD900F201D5 /* countgroupscommand.h */, A795840C13F13CD900F201D5 /* countgroupscommand.cpp */, A7730EFD13967241007433A3 /* countseqscommand.h */, A7730EFE13967241007433A3 /* countseqscommand.cpp */, A77EBD2C1523707F00ED407C /* createdatabasecommand.h */, A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */, A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */, A7E9B6C512D37EC400DA6239 /* degapseqscommand.cpp */, A7E9B6C812D37EC400DA6239 /* deuniqueseqscommand.h */, A7E9B6C712D37EC400DA6239 /* deuniqueseqscommand.cpp */, A77A221D139001B600B0BE70 /* deuniquetreecommand.h */, A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */, A7E9B6CC12D37EC400DA6239 /* distancecommand.h */, A7E9B6CB12D37EC400DA6239 /* distancecommand.cpp */, A7E9B74A12D37EC400DA6239 /* distsharedcommand.h */, A7E9B74912D37EC400DA6239 /* distsharedcommand.cpp */, F4103A6F25A4C4D2001ED741 /* diversityestimatorcommand.hpp */, 484976E52256799100F3A291 /* diversityestimatorcommand.cpp */, A7E9B78012D37EC400DA6239 /* fastaqinfocommand.h */, A7E9B77F12D37EC400DA6239 /* fastaqinfocommand.cpp */, A7E9B6E412D37EC400DA6239 /* filterseqscommand.h */, A7E9B6E312D37EC400DA6239 /* filterseqscommand.cpp */, A79EEF8816971D640006DEC1 /* filtersharedcommand.h */, A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */, 219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */, 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */, A7FE7C3E1330EA1000F7B327 /* getcurrentcommand.h */, A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */, A7128B1A16B7001200723BE4 /* getdistscommand.h */, A7128B1C16B7002600723BE4 /* getdistscommand.cpp */, A7E9B6F312D37EC400DA6239 /* getgroupcommand.h */, A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */, A7E9B6F512D37EC400DA6239 /* getgroupscommand.h */, A7E9B6F412D37EC400DA6239 /* getgroupscommand.cpp */, A7E9B6F712D37EC400DA6239 /* getlabelcommand.h */, A7E9B6F612D37EC400DA6239 /* getlabelcommand.cpp */, A7E9B6F912D37EC400DA6239 /* getlineagecommand.h */, A7E9B6F812D37EC400DA6239 /* getlineagecommand.cpp */, A7E9B6FB12D37EC400DA6239 /* getlistcountcommand.h */, A7E9B6FA12D37EC400DA6239 /* getlistcountcommand.cpp */, A7548FAB17142EA500B1F05A /* getmetacommunitycommand.h */, A7548FAC17142EBC00B1F05A /* getmetacommunitycommand.cpp */, 48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */, 48705ABC19BE32C50075E977 /* getmimarkspackagecommand.h */, A7E9B6FF12D37EC400DA6239 /* getoturepcommand.h */, A7E9B6FE12D37EC400DA6239 /* getoturepcommand.cpp */, A70056E8156A93E300924A2D /* getotuscommand.h */, A70056E5156A93D000924A2D /* getotuscommand.cpp */, A7E9B70312D37EC400DA6239 /* getrabundcommand.h */, A7E9B70212D37EC400DA6239 /* getrabundcommand.cpp */, A7E9B70512D37EC400DA6239 /* getrelabundcommand.h */, A7E9B70412D37EC400DA6239 /* getrelabundcommand.cpp */, A7E9B70712D37EC400DA6239 /* getsabundcommand.h */, A7E9B70612D37EC400DA6239 /* getsabundcommand.cpp */, A7E9B70912D37EC400DA6239 /* getseqscommand.h */, A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */, A7E9B70B12D37EC400DA6239 /* getsharedotucommand.h */, A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */, A7E9B71F12D37EC400DA6239 /* heatmapcommand.h */, A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */, A7E9B72312D37EC400DA6239 /* heatmapsimcommand.h */, A7E9B72212D37EC400DA6239 /* heatmapsimcommand.cpp */, A7E9B72912D37EC400DA6239 /* helpcommand.h */, A7E9B72812D37EC400DA6239 /* helpcommand.cpp */, A75790571301749D00A30DAB /* homovacommand.h */, A75790581301749D00A30DAB /* homovacommand.cpp */, A7E9B72C12D37EC400DA6239 /* indicatorcommand.h */, A7E9B72B12D37EC400DA6239 /* indicatorcommand.cpp */, A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */, A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */, A7190B211768E0DF00A9AFA6 /* lefsecommand.h */, A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */, A7E9B73C12D37EC400DA6239 /* libshuffcommand.h */, A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */, A7A0671C156294810095C8C5 /* listotuscommand.h */, A7A067191562946F0095C8C5 /* listotuscommand.cpp */, A7E9B73E12D37EC400DA6239 /* listseqscommand.h */, A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */, A7FA10001302E096003860FE /* mantelcommand.h */, A7FA10011302E096003860FE /* mantelcommand.cpp */, A724D2B4153C8600000A826F /* makebiomcommand.h */, A724D2B6153C8628000A826F /* makebiomcommand.cpp */, A7A0671D1562AC230095C8C5 /* makecontigscommand.h */, A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */, A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */, A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */, 48DB37B21B3B27E000C372A4 /* makefilecommand.h */, 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */, A7E9B74412D37EC400DA6239 /* makegroupcommand.h */, A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */, 48F1C16423D606050034DAAF /* makeclrcommand.cpp */, 48F1C16523D606050034DAAF /* makeclrcommand.hpp */, A741744B175CD9B1007DF49B /* makelefsecommand.h */, A741744A175CD9B1007DF49B /* makelefsecommand.cpp */, A7E6F69C17427CF2006775E2 /* makelookupcommand.h */, A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */, A7E9B7F312D37EC400DA6239 /* makesharedcommand.h */, A7E9B7F212D37EC400DA6239 /* makesharedcommand.cpp */, 48910D411D5243E500F60EDB /* mergecountcommand.cpp */, 48910D421D5243E500F60EDB /* mergecountcommand.hpp */, A7E9B75412D37EC400DA6239 /* mergefilecommand.h */, A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */, A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */, A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */, 48CF76EE21BEBDD300B2FB5C /* mergeotuscommand.cpp */, 48CF76EF21BEBDD300B2FB5C /* mergeotuscommand.hpp */, 48705AC019BE32C50075E977 /* mergesfffilecommand.h */, 48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */, A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */, A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */, A7E9B75812D37EC400DA6239 /* metastatscommand.h */, A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */, A7E9B75A12D37EC400DA6239 /* mgclustercommand.h */, A7E9B75912D37EC400DA6239 /* mgclustercommand.cpp */, 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */, 487C5A861AB88B93002AF48A /* mimarksattributescommand.h */, A7E9B76A12D37EC400DA6239 /* nocommands.h */, A7E9B76912D37EC400DA6239 /* nocommands.cpp */, A7E9B76C12D37EC400DA6239 /* normalizesharedcommand.h */, A7E9B76B12D37EC400DA6239 /* normalizesharedcommand.cpp */, A713EBEB12DC7C5E000092AC /* nmdscommand.h */, A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */, A7A3C8C814D041AD00B1BFBE /* otuassociationcommand.h */, A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */, A7E9B77A12D37EC400DA6239 /* otuhierarchycommand.h */, A7E9B77912D37EC400DA6239 /* otuhierarchycommand.cpp */, A7E9B77E12D37EC400DA6239 /* pairwiseseqscommand.h */, A7E9B77D12D37EC400DA6239 /* pairwiseseqscommand.cpp */, A7E9B78612D37EC400DA6239 /* parsimonycommand.h */, A7E9B78512D37EC400DA6239 /* parsimonycommand.cpp */, A7FC486512D795D60055BC5C /* pcacommand.h */, A7FC486612D795D60055BC5C /* pcacommand.cpp */, A7E9B78812D37EC400DA6239 /* pcoacommand.h */, A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */, A76CDD7F1510F09A004C8458 /* pcrseqscommand.h */, 481623E11B56A2DB004C60B7 /* pcrseqscommand.cpp */, A7E9B78C12D37EC400DA6239 /* phylodiversitycommand.h */, A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */, A7E9B79212D37EC400DA6239 /* phylotypecommand.h */, A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */, A7E9B79812D37EC400DA6239 /* preclustercommand.h */, A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */, A74C06E616A9C097008390A3 /* primerdesigncommand.h */, A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */, A7E9B7A212D37EC400DA6239 /* quitcommand.h */, A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */, A7E9B7AC12D37EC400DA6239 /* rarefactcommand.h */, A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */, A7E9B7AF12D37EC400DA6239 /* rarefactsharedcommand.h */, A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */, A7B0231716B8245D006BA09E /* removedistscommand.h */, A7B0231416B8244B006BA09E /* removedistscommand.cpp */, A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */, A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */, A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */, A7E9B7C512D37EC400DA6239 /* removelineagecommand.cpp */, A70056E9156AB6D400924A2D /* removeotuscommand.h */, A70056EA156AB6E500924A2D /* removeotuscommand.cpp */, A727864212E9E28C00F86ABA /* removerarecommand.h */, A727864312E9E28C00F86ABA /* removerarecommand.cpp */, A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */, A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */, 488841631CC6C34900C5E972 /* renamefilecommand.cpp */, 488841641CC6C34900C5E972 /* renamefilecommand.h */, A7CFA42F1755400500D9ED4D /* renameseqscommand.h */, A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */, A7E9B7CE12D37EC400DA6239 /* reversecommand.h */, A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */, A7E9B7D212D37EC400DA6239 /* screenseqscommand.h */, A7E9B7D112D37EC400DA6239 /* screenseqscommand.cpp */, A7E9B7D612D37EC400DA6239 /* sensspeccommand.h */, A7E9B7D512D37EC400DA6239 /* sensspeccommand.cpp */, A7E9B7D812D37EC400DA6239 /* seqerrorcommand.h */, A7E9B7D712D37EC400DA6239 /* seqerrorcommand.cpp */, A7E9B7DA12D37EC400DA6239 /* seqsummarycommand.h */, A7E9B7D912D37EC400DA6239 /* seqsummarycommand.cpp */, A7FE7E6B13311EA400F7B327 /* setcurrentcommand.h */, A7FE7E6C13311EA400F7B327 /* setcurrentcommand.cpp */, A7E9B7E012D37EC400DA6239 /* setdircommand.h */, A7E9B7DF12D37EC400DA6239 /* setdircommand.cpp */, A7E9B7E212D37EC400DA6239 /* setlogfilecommand.h */, A7E9B7E112D37EC400DA6239 /* setlogfilecommand.cpp */, 481FB5291AC19F8B0076CFF3 /* setseedcommand.h */, 481FB5281AC19F8B0076CFF3 /* setseedcommand.cpp */, A7E9B7E412D37EC400DA6239 /* sffinfocommand.h */, A7E9B7E312D37EC400DA6239 /* sffinfocommand.cpp */, A7C7DAB615DA75760059B0CF /* sffmultiplecommand.h */, A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */, A7E9B82812D37EC400DA6239 /* shhhercommand.h */, A7E9B82712D37EC400DA6239 /* shhhercommand.cpp */, A774101214695AF60098E6AC /* shhhseqscommand.h */, A774101314695AF60098E6AC /* shhhseqscommand.cpp */, A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */, A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */, A77B7183173D222F002163C2 /* sparcccommand.h */, A77B7184173D2240002163C2 /* sparcccommand.cpp */, A7E9B84012D37EC400DA6239 /* splitabundcommand.h */, A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */, A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */, A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */, A747EC6F181EA0E500345732 /* sracommand.h */, A747EC70181EA0F900345732 /* sracommand.cpp */, 48ED1E8423689DE8003E66F7 /* srainfocommand.hpp */, 48ED1E8323689DE8003E66F7 /* srainfocommand.cpp */, A7E9B85012D37EC400DA6239 /* subsamplecommand.h */, A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */, A7E9B85812D37EC400DA6239 /* summarycommand.h */, A7E9B85712D37EC400DA6239 /* summarycommand.cpp */, A754149514840CF7005850D1 /* summaryqualcommand.h */, A754149614840CF7005850D1 /* summaryqualcommand.cpp */, A7E9B85A12D37EC400DA6239 /* summarysharedcommand.h */, A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */, A7FFB556142CA02C004884F2 /* summarytaxcommand.h */, A7FFB557142CA02C004884F2 /* summarytaxcommand.cpp */, A7E9B85C12D37EC400DA6239 /* systemcommand.h */, A7E9B85B12D37EC400DA6239 /* systemcommand.cpp */, F4B4B0DA27396EF7003B2133 /* translateseqscommand.cpp */, F4B4B0DB27396EF7003B2133 /* translateseqscommand.hpp */, A7E9B86312D37EC400DA6239 /* treesharedcommand.h */, A7E9B86212D37EC400DA6239 /* treesharedcommand.cpp */, A7E9B86912D37EC400DA6239 /* trimflowscommand.h */, A7E9B86812D37EC400DA6239 /* trimflowscommand.cpp */, A7E9B86B12D37EC400DA6239 /* trimseqscommand.h */, A7E9B86A12D37EC400DA6239 /* trimseqscommand.cpp */, A7E9B86D12D37EC400DA6239 /* unifracunweightedcommand.h */, A7E9B86C12D37EC400DA6239 /* unifracunweightedcommand.cpp */, A7E9B86F12D37EC400DA6239 /* unifracweightedcommand.h */, A7E9B86E12D37EC400DA6239 /* unifracweightedcommand.cpp */, A7E9B6C412D37EC400DA6239 /* uniqueseqscommand.h */, A7E9B6C312D37EC400DA6239 /* uniqueseqscommand.cpp */, A7E9B87B12D37EC400DA6239 /* venncommand.h */, A7E9B87A12D37EC400DA6239 /* venncommand.cpp */, ); name = commands; path = source/commands; sourceTree = SOURCE_ROOT; }; A7E9BA3F12D395F700DA6239 /* calculators */ = { isa = PBXGroup; children = ( F40859B0280F3CB200F19B1A /* README.txt */, A7E9B66F12D37EC400DA6239 /* calculator.h */, 481E40E0244F62980059C925 /* calculator.cpp */, A7E9B6E212D37EC400DA6239 /* filters.h */, A7E9B86112D37EC400DA6239 /* treecalculator.h */, 4815BECB229717E100677EE2 /* diversitycalc.h */, 484976DC22552BEA00F3A291 /* diversitycalcs */, 48E544691E9D17E000FF6AB8 /* clearcutcalcs */, 48E544671E9D14C500FF6AB8 /* distcalcs */, 48E5443E1E9C28CC00FF6AB8 /* clustercalcs */, 48E544661E9D12CA00FF6AB8 /* otucalcs */, 48E544681E9D175100FF6AB8 /* unifraccalcs */, ); name = calculators; path = source/calculators; sourceTree = SOURCE_ROOT; }; A7E9BA4212D3960D00DA6239 /* containers */ = { isa = PBXGroup; children = ( A7E9B65312D37EC300DA6239 /* alignment.cpp */, A7E9B65412D37EC300DA6239 /* alignment.hpp */, A7E9B65512D37EC300DA6239 /* alignmentcell.cpp */, A7E9B65612D37EC300DA6239 /* alignmentcell.hpp */, A7E9B65712D37EC300DA6239 /* alignmentdb.cpp */, A7E9B65812D37EC300DA6239 /* alignmentdb.h */, A7E9B76312D37EC400DA6239 /* alignreport.cpp */, A7E9B76412D37EC400DA6239 /* alignreport.hpp */, F4A866BE265BE7EC0010479A /* aminoacid.hpp */, F4A866BD265BE7EC0010479A /* aminoacid.cpp */, 48A0B8EA2547282600726384 /* biom.cpp */, 48A0B8EB2547282600726384 /* biom.hpp */, 48A0B8EF25472C4500726384 /* biomhdf5.cpp */, 48A0B8F025472C4500726384 /* biomhdf5.hpp */, 48A0B8F425472C6500726384 /* biomsimple.cpp */, 48A0B8F525472C6500726384 /* biomsimple.hpp */, 48883FFB20C6D6C000CAF112 /* compare.h */, F45A2E3B25A78B4D00994F76 /* contigsreport.hpp */, F45A2E3C25A78B4D00994F76 /* contigsreport.cpp */, A74D59A6159A1E3600043046 /* counttable.h */, A74D59A3159A1E2000043046 /* counttable.cpp */, A7E9B6BF12D37EC400DA6239 /* datavector.hpp */, A77916E7176F7F7600EEFE18 /* designmap.h */, A77916E6176F7F7600EEFE18 /* designmap.cpp */, A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */, A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */, A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */, A7E9B6DF12D37EC400DA6239 /* fastamap.h */, 48C51DEE1A76B870004ECDF1 /* fastqread.h */, 48C51DEF1A76B888004ECDF1 /* fastqread.cpp */, 48BD4EB621F7724C008EA73D /* filefile.cpp */, 48BD4EB721F7724C008EA73D /* filefile.hpp */, A7E9B6E812D37EC400DA6239 /* flowdata.h */, A7E9B6E712D37EC400DA6239 /* flowdata.cpp */, A7E9B6EE12D37EC400DA6239 /* fullmatrix.cpp */, A7E9B6EF12D37EC400DA6239 /* fullmatrix.h */, A7E9B71412D37EC400DA6239 /* groupmap.cpp */, A7E9B71512D37EC400DA6239 /* groupmap.h */, A7E9B73312D37EC400DA6239 /* kmer.cpp */, A7E9B73412D37EC400DA6239 /* kmer.hpp */, 48C51DF21A793EFE004ECDF1 /* kmeralign.h */, 48C51DF11A793EFE004ECDF1 /* kmeralign.cpp */, A7E9B73512D37EC400DA6239 /* kmerdb.cpp */, A7E9B73612D37EC400DA6239 /* kmerdb.hpp */, A7E9B73F12D37EC400DA6239 /* listvector.cpp */, A7E9B74012D37EC400DA6239 /* listvector.hpp */, A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */, A7E9B76012D37EC400DA6239 /* nameassignment.hpp */, 48705ABE19BE32C50075E977 /* oligos.h */, 48705ABD19BE32C50075E977 /* oligos.cpp */, 48FB99CA20A4AD7D00FF9F6E /* optiblastmatrix.cpp */, 48FB99CB20A4AD7D00FF9F6E /* optiblastmatrix.hpp */, 48FB99C720A48EF700FF9F6E /* optidata.cpp */, 48FB99C820A48EF700FF9F6E /* optidata.hpp */, 488C1DE8242D102B00BDCCB4 /* optidb.cpp */, 488C1DE9242D102B00BDCCB4 /* optidb.hpp */, 48910D491D58CBA300F60EDB /* optimatrix.cpp */, 48910D4A1D58CBA300F60EDB /* optimatrix.h */, 48FB99C4209B69FA00FF9F6E /* optirefmatrix.hpp */, 48FB99C3209B69FA00FF9F6E /* optirefmatrix.cpp */, A7E9B77712D37EC400DA6239 /* ordervector.cpp */, A7E9B77812D37EC400DA6239 /* ordervector.hpp */, 482AC3B72562B57600C9AF4A /* picrust.cpp */, 482AC3B82562B57600C9AF4A /* picrust.hpp */, F4A866B6265BE7720010479A /* protein.hpp */, F4A866B5265BE7720010479A /* protein.cpp */, F4A866CF266912830010479A /* proteindb.cpp */, F4A866D0266912830010479A /* proteindb.hpp */, A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */, A7E9B7A012D37EC400DA6239 /* qualityscores.h */, 48BDDA771ECA3B8E00F0F6C0 /* rabundfloatvector.cpp */, 48BDDA781ECA3B8E00F0F6C0 /* rabundfloatvector.hpp */, A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */, A7E9B7A412D37EC400DA6239 /* rabundvector.hpp */, 48E0230124BF488D00BFEA41 /* report.cpp */, 48E0230224BF488D00BFEA41 /* report.hpp */, A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */, A7E9B7D012D37EC400DA6239 /* sabundvector.hpp */, A7E9B6BE12D37EC400DA6239 /* searchdatabase.hpp */, A7E9B7DB12D37EC400DA6239 /* sequence.cpp */, A7E9B7DC12D37EC400DA6239 /* sequence.hpp */, A741FAD415D168A00067BCC5 /* sequencecountparser.h */, A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */, A7E9B7DD12D37EC400DA6239 /* sequencedb.cpp */, A7E9B7DE12D37EC400DA6239 /* sequencedb.h */, A7F9F5CD141A5E500032F693 /* sequenceparser.h */, A7F9F5CE141A5E500032F693 /* sequenceparser.cpp */, 48A055312491577800D0F97F /* sffheader.cpp */, 48A055322491577800D0F97F /* sffheader.hpp */, 48A0552E2490066C00D0F97F /* sffread.cpp */, 48A0552F2490066C00D0F97F /* sffread.hpp */, 48F1C16C23D78F8D0034DAAF /* sharedclrvector.cpp */, 48F1C16D23D78F8D0034DAAF /* sharedclrvector.hpp */, 48F1C16823D78D7B0034DAAF /* sharedclrvectors.cpp */, 48F1C16923D78D7B0034DAAF /* sharedclrvectors.hpp */, A7E9B80412D37EC400DA6239 /* sharedlistvector.cpp */, A7E9B80512D37EC400DA6239 /* sharedlistvector.h */, A7E9B80E12D37EC400DA6239 /* sharedordervector.h */, A7E9B80D12D37EC400DA6239 /* sharedordervector.cpp */, 485B0E061F264F2E00CA5F57 /* sharedrabundvector.cpp */, 485B0E071F264F2E00CA5F57 /* sharedrabundvector.hpp */, 485B0E0C1F27C40500CA5F57 /* sharedrabundfloatvector.cpp */, 485B0E0D1F27C40500CA5F57 /* sharedrabundfloatvector.hpp */, 48BDDA741ECA067000F0F6C0 /* sharedrabundfloatvectors.hpp */, 48BDDA731ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp */, 48BDDA6F1EC9D31400F0F6C0 /* sharedrabundvectors.hpp */, 48BDDA701EC9D31400F0F6C0 /* sharedrabundvectors.cpp */, A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */, A7E9B83A12D37EC400DA6239 /* sparsematrix.hpp */, A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */, A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */, F4A866DA266946AB0010479A /* storagedatabase.hpp */, A7E9B85112D37EC400DA6239 /* suffixdb.cpp */, A7E9B85212D37EC400DA6239 /* suffixdb.hpp */, A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */, A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */, A7E9B85512D37EC400DA6239 /* suffixtree.cpp */, A7E9B85612D37EC400DA6239 /* suffixtree.hpp */, 488563D023CD00C4007B5659 /* taxonomy.hpp */, 488563CF23CD00C4007B5659 /* taxonomy.cpp */, A7E9B85F12D37EC400DA6239 /* tree.cpp */, A7E9B86012D37EC400DA6239 /* tree.h */, A7E9B86412D37EC400DA6239 /* treemap.cpp */, A7E9B86512D37EC400DA6239 /* treemap.h */, A7E9B86612D37EC400DA6239 /* treenode.cpp */, A7E9B86712D37EC400DA6239 /* treenode.h */, ); name = containers; path = source/datastructures; sourceTree = SOURCE_ROOT; }; A7E9BA4512D3965600DA6239 /* chimera */ = { isa = PBXGroup; children = ( F40859B12811AE6500F19B1A /* README.txt */, A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */, A7E9B65D12D37EC300DA6239 /* bellerophon.h */, A7E9B67412D37EC400DA6239 /* ccode.cpp */, A7E9B67512D37EC400DA6239 /* ccode.h */, A7E9B68012D37EC400DA6239 /* chimeracheckrdp.cpp */, A7E9B68112D37EC400DA6239 /* chimeracheckrdp.h */, A7E9B68412D37EC400DA6239 /* chimerarealigner.cpp */, A7E9B68512D37EC400DA6239 /* chimerarealigner.h */, A7E9B6C212D37EC400DA6239 /* decalc.h */, A7E9B6C112D37EC400DA6239 /* decalc.cpp */, A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */, A7E9B68912D37EC400DA6239 /* chimeraslayer.h */, A7E9B74612D37EC400DA6239 /* maligner.h */, A7E9B74512D37EC400DA6239 /* maligner.cpp */, A7E9B67912D37EC400DA6239 /* mothurchimera.h */, A7E9B67812D37EC400DA6239 /* mothurchimera.cpp */, A7BF221314587886000AD524 /* myPerseus.h */, A7BF221214587886000AD524 /* myPerseus.cpp */, A7E9B79312D37EC400DA6239 /* pintail.cpp */, A7E9B79412D37EC400DA6239 /* pintail.h */, A7E9B82E12D37EC400DA6239 /* slayer.cpp */, A7E9B82F12D37EC400DA6239 /* slayer.h */, ); name = chimera; path = source/chimera; sourceTree = SOURCE_ROOT; }; A7E9BA4B12D3966900DA6239 /* classifier */ = { isa = PBXGroup; children = ( A721AB67161C570F009860A1 /* alignnode.h */, A721AB66161C570F009860A1 /* alignnode.cpp */, A721AB69161C570F009860A1 /* aligntree.h */, A721AB68161C570F009860A1 /* aligntree.cpp */, A7E9B65B12D37EC300DA6239 /* bayesian.h */, A7E9B65A12D37EC300DA6239 /* bayesian.cpp */, A7E9B68E12D37EC400DA6239 /* classify.cpp */, A7E9B68F12D37EC400DA6239 /* classify.h */, A721AB6E161C572A009860A1 /* kmernode.h */, A721AB6D161C572A009860A1 /* kmernode.cpp */, A721AB70161C572A009860A1 /* kmertree.h */, A721AB6F161C572A009860A1 /* kmertree.cpp */, A7E9B73812D37EC400DA6239 /* knn.h */, A7E9B73712D37EC400DA6239 /* knn.cpp */, A7E9B78D12D37EC400DA6239 /* phylosummary.cpp */, A7E9B78E12D37EC400DA6239 /* phylosummary.h */, A7E9B78F12D37EC400DA6239 /* phylotree.cpp */, A7E9B79012D37EC400DA6239 /* phylotree.h */, A7E9B85D12D37EC400DA6239 /* taxonomyequalizer.cpp */, A7E9B85E12D37EC400DA6239 /* taxonomyequalizer.h */, A721AB74161C573B009860A1 /* taxonomynode.h */, A721AB73161C573B009860A1 /* taxonomynode.cpp */, ); name = classifier; path = source/classifier; sourceTree = SOURCE_ROOT; }; A7E9BA4F12D398D700DA6239 /* clearcut */ = { isa = PBXGroup; children = ( F40859AF280F2DDB00F19B1A /* README.txt */, A7E9B69412D37EC400DA6239 /* clearcut.cpp */, A7E9B69512D37EC400DA6239 /* clearcut.h */, A7E9B6A412D37EC400DA6239 /* cmdargs.cpp */, A7E9B6A512D37EC400DA6239 /* cmdargs.h */, A7E9B6B312D37EC400DA6239 /* common.h */, A7E9B6CF12D37EC400DA6239 /* distclearcut.cpp */, A7E9B6D012D37EC400DA6239 /* distclearcut.h */, A7E9B6D312D37EC400DA6239 /* dmat.cpp */, A7E9B6D412D37EC400DA6239 /* dmat.h */, A7E9B6DC12D37EC400DA6239 /* fasta.cpp */, A7E9B6DD12D37EC400DA6239 /* fasta.h */, A7E9B6FD12D37EC400DA6239 /* getopt_long.h */, A7E9B6FC12D37EC400DA6239 /* getopt_long.cpp */, ); name = clearcut; path = source/clearcut; sourceTree = SOURCE_ROOT; }; A7E9BA5312D39A5E00DA6239 /* read */ = { isa = PBXGroup; children = ( A7E9B7B012D37EC400DA6239 /* readblast.cpp */, A7E9B7B112D37EC400DA6239 /* readblast.h */, A7E9B7B212D37EC400DA6239 /* readcluster.cpp */, A7E9B7B312D37EC400DA6239 /* readcluster.h */, A7E9B7B412D37EC400DA6239 /* readcolumn.cpp */, A7E9B7B512D37EC400DA6239 /* readcolumn.h */, A7E9B7B812D37EC400DA6239 /* readmatrix.hpp */, A7E9B7BD12D37EC400DA6239 /* readphylip.cpp */, A7E9B7BE12D37EC400DA6239 /* readphylip.h */, A7E9B7BF12D37EC400DA6239 /* readtree.cpp */, A7E9B7C012D37EC400DA6239 /* readtree.h */, A713EBAA12DC7613000092AC /* readphylipvector.h */, A713EBAB12DC7613000092AC /* readphylipvector.cpp */, A7E9B84312D37EC400DA6239 /* splitmatrix.cpp */, A7E9B84412D37EC400DA6239 /* splitmatrix.h */, A7D755D71535F665009BF21A /* treereader.h */, A7D755D91535F679009BF21A /* treereader.cpp */, ); name = read; path = source/read; sourceTree = SOURCE_ROOT; }; A7E9BA5612D39BD800DA6239 /* metastats */ = { isa = PBXGroup; children = ( A79234D513C74BF6002B08E2 /* mothurfisher.h */, A79234D613C74BF6002B08E2 /* mothurfisher.cpp */, A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */, A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */, ); name = metastats; path = source/metastats; sourceTree = SOURCE_ROOT; }; F45A2E4F25BF229600994F76 /* mothur */ = { isa = PBXGroup; children = ( F45A2E5025BF229600994F76 /* main.cpp */, ); path = mothur; sourceTree = ""; }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ 481FB5181AC0A63E0076CFF3 /* TestMothur */ = { isa = PBXNativeTarget; buildConfigurationList = 481FB51F1AC0A63E0076CFF3 /* Build configuration list for PBXNativeTarget "TestMothur" */; buildPhases = ( 481FB5151AC0A63E0076CFF3 /* Sources */, 481FB5161AC0A63E0076CFF3 /* Frameworks */, 481FB5171AC0A63E0076CFF3 /* CopyFiles */, ); buildRules = ( 481FB6A11AC1BE060076CFF3 /* PBXBuildRule */, ); dependencies = ( ); name = TestMothur; productName = TestMothur; productReference = 481FB5191AC0A63E0076CFF3 /* TestMothur */; productType = "com.apple.product-type.tool"; }; 8DD76FA90486AB0100D96B5E /* Mothur */ = { isa = PBXNativeTarget; buildConfigurationList = 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Mothur" */; buildPhases = ( 8DD76FAB0486AB0100D96B5E /* Sources */, 8DD76FAD0486AB0100D96B5E /* Frameworks */, 8DD76FAF0486AB0100D96B5E /* CopyFiles */, ); buildRules = ( A7D162CB149F96CA000523E8 /* PBXBuildRule */, ); dependencies = ( ); name = Mothur; productInstallPath = "$(HOME)/bin"; productName = mothur; productReference = 8DD76FB20486AB0100D96B5E /* mothur */; productType = "com.apple.product-type.tool"; }; /* End PBXNativeTarget section */ /* Begin PBXProject section */ 08FB7793FE84155DC02AAC07 /* Project object */ = { isa = PBXProject; attributes = { LastUpgradeCheck = 1200; ORGANIZATIONNAME = "Schloss Lab"; TargetAttributes = { 481FB5181AC0A63E0076CFF3 = { CreatedOnToolsVersion = 6.2; }; }; }; buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; compatibilityVersion = "Xcode 3.2"; developmentRegion = en; hasScannedForEncodings = 1; knownRegions = ( ja, de, fr, en, Base, ); mainGroup = 08FB7794FE84155DC02AAC07 /* mothur */; projectDirPath = ""; projectRoot = ""; targets = ( 8DD76FA90486AB0100D96B5E /* Mothur */, 481FB5181AC0A63E0076CFF3 /* TestMothur */, ); }; /* End PBXProject section */ /* Begin PBXSourcesBuildPhase section */ 481FB5151AC0A63E0076CFF3 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( 48E544421E9C292900FF6AB8 /* mcc.cpp in Sources */, 48C728651B66A77800D40830 /* testsequence.cpp in Sources */, 481FB5E51AC1B77E0076CFF3 /* nocommands.cpp in Sources */, 481FB5F61AC1B77E0076CFF3 /* quitcommand.cpp in Sources */, 481FB52C1AC1B0A70076CFF3 /* commandfactory.cpp in Sources */, 481FB5C71AC1B74F0076CFF3 /* getsabundcommand.cpp in Sources */, 481FB5A51AC1B7300076CFF3 /* clusterdoturcommand.cpp in Sources */, 481FB6271AC1B7EA0076CFF3 /* alignmentdb.cpp in Sources */, 489387F62107A60C00284329 /* testoptirefmatrix.cpp in Sources */, 481FB6351AC1B7EA0076CFF3 /* kmerdb.cpp in Sources */, 481FB5721AC1B6D40076CFF3 /* simpson.cpp in Sources */, 481FB55D1AC1B6690076CFF3 /* sharedchao1.cpp in Sources */, 48BDDA7A1ECA3B8E00F0F6C0 /* rabundfloatvector.cpp in Sources */, 481FB5FE1AC1B7970076CFF3 /* removerarecommand.cpp in Sources */, 481FB53C1AC1B5F10076CFF3 /* bootstrap.cpp in Sources */, 48F1C16B23D78D7B0034DAAF /* sharedclrvectors.cpp in Sources */, 481FB5E21AC1B77E0076CFF3 /* metastatscommand.cpp in Sources */, 488563D223CD00C4007B5659 /* taxonomy.cpp in Sources */, 481FB5631AC1B6A10076CFF3 /* sharedkulczynski.cpp in Sources */, 481FB5EF1AC1B77E0076CFF3 /* pcoacommand.cpp in Sources */, 481FB64E1AC1B7F40076CFF3 /* treenode.cpp in Sources */, 481FB5801AC1B6EA0076CFF3 /* weighted.cpp in Sources */, 481FB54F1AC1B63A0076CFF3 /* memeuclidean.cpp in Sources */, 48910D511D58E26C00F60EDB /* testopticluster.cpp in Sources */, 4815BEBE2295A02800677EE2 /* diversityutils.cpp in Sources */, 481FB5611AC1B69B0076CFF3 /* sharedjsd.cpp in Sources */, 481FB5AF1AC1B7300076CFF3 /* createdatabasecommand.cpp in Sources */, 481FB5731AC1B6EA0076CFF3 /* simpsoneven.cpp in Sources */, 481FB58D1AC1B7060076CFF3 /* collect.cpp in Sources */, 481FB5A01AC1B71B0076CFF3 /* classifysvmsharedcommand.cpp in Sources */, 481FB5741AC1B6EA0076CFF3 /* smithwilson.cpp in Sources */, 481FB5381AC1B5E30076CFF3 /* clusterclassic.cpp in Sources */, 48C728721B6AB3B900D40830 /* testremovegroupscommand.cpp in Sources */, 481FB5B61AC1B74F0076CFF3 /* filtersharedcommand.cpp in Sources */, 48ED1E7E235E1BB4003E66F7 /* interactengine.cpp in Sources */, 481FB5F81AC1B77E0076CFF3 /* rarefactsharedcommand.cpp in Sources */, 481FB62E1AC1B7EA0076CFF3 /* fastamap.cpp in Sources */, 481FB5C41AC1B74F0076CFF3 /* getotuscommand.cpp in Sources */, 481FB5A61AC1B7300076CFF3 /* clusterfragmentscommand.cpp in Sources */, 481FB5C01AC1B74F0076CFF3 /* getlineagecommand.cpp in Sources */, 481FB5F71AC1B77E0076CFF3 /* rarefactcommand.cpp in Sources */, 481FB61F1AC1B7AC0076CFF3 /* venncommand.cpp in Sources */, 480D1E311EA92D5500BF9C77 /* fakeoptimatrix.cpp in Sources */, 48E5445A1E9C2E6500FF6AB8 /* fp.cpp in Sources */, 481FB61A1AC1B7AC0076CFF3 /* treesharedcommand.cpp in Sources */, 481FB60A1AC1B7970076CFF3 /* sffinfocommand.cpp in Sources */, 481FB58C1AC1B6FF0076CFF3 /* slayer.cpp in Sources */, 481FB6531AC1B8100076CFF3 /* gotohoverlap.cpp in Sources */, 481FB65B1AC1B82C0076CFF3 /* mothurfisher.cpp in Sources */, 48E5445E1E9C2F0F00FF6AB8 /* fn.cpp in Sources */, 481FB6721AC1B8820076CFF3 /* refchimeratest.cpp in Sources */, 481FB6051AC1B7970076CFF3 /* seqerrorcommand.cpp in Sources */, 48ED1E8623689DE8003E66F7 /* srainfocommand.cpp in Sources */, 481FB6871AC1B8B80076CFF3 /* venn.cpp in Sources */, 48E544791E9D3CE400FF6AB8 /* npv.cpp in Sources */, 481FB5D71AC1B75C0076CFF3 /* makebiomcommand.cpp in Sources */, 48ED1E82235E1D59003E66F7 /* batchengine.cpp in Sources */, 481FB6601AC1B8450076CFF3 /* nast.cpp in Sources */, 48E544461E9C2B1000FF6AB8 /* sensitivity.cpp in Sources */, 481FB5861AC1B6FF0076CFF3 /* chimerarealigner.cpp in Sources */, 481FB5A81AC1B7300076CFF3 /* collectcommand.cpp in Sources */, 488C1DEB242D102B00BDCCB4 /* optidb.cpp in Sources */, 481FB5961AC1B71B0076CFF3 /* chimeraccodecommand.cpp in Sources */, 48B44EEF1FB5006500789C45 /* currentfile.cpp in Sources */, 4827A4DC1CB3ED2200345170 /* fastqdataset.cpp in Sources */, 481FB61B1AC1B7AC0076CFF3 /* trimflowscommand.cpp in Sources */, 481FB6781AC1B88F0076CFF3 /* readcolumn.cpp in Sources */, 481FB6831AC1B8B80076CFF3 /* trialSwap2.cpp in Sources */, 481FB63A1AC1B7EA0076CFF3 /* qualityscores.cpp in Sources */, 4803D5B3211DDA5A001C63B5 /* testsharedrabundvectors.cpp in Sources */, 481FB5FD1AC1B7970076CFF3 /* removeotuscommand.cpp in Sources */, 481FB63F1AC1B7EA0076CFF3 /* sequencecountparser.cpp in Sources */, 481FB67C1AC1B88F0076CFF3 /* splitmatrix.cpp in Sources */, 48E5447D1E9D3F0400FF6AB8 /* fdr.cpp in Sources */, 481FB59C1AC1B71B0076CFF3 /* chopseqscommand.cpp in Sources */, 481FB5DE1AC1B77E0076CFF3 /* mergesfffilecommand.cpp in Sources */, 481FB6421AC1B7EA0076CFF3 /* sharedlistvector.cpp in Sources */, 489AF6952106194A0028155E /* optifitcluster.cpp in Sources */, 481FB5A71AC1B7300076CFF3 /* clustersplitcommand.cpp in Sources */, 481FB65C1AC1B82C0076CFF3 /* mothurmetastats.cpp in Sources */, 481FB5EB1AC1B77E0076CFF3 /* fastaqinfocommand.cpp in Sources */, 481FB6341AC1B7EA0076CFF3 /* kmeralign.cpp in Sources */, 481FB55B1AC1B6630076CFF3 /* sharedanderbergs.cpp in Sources */, 481FB5B81AC1B74F0076CFF3 /* getcoremicrobiomecommand.cpp in Sources */, 481FB54E1AC1B6340076CFF3 /* memchord.cpp in Sources */, 481FB6021AC1B7970076CFF3 /* screenseqscommand.cpp in Sources */, 48E544621E9C2FB800FF6AB8 /* fpfn.cpp in Sources */, 48E543EC1E8F15B800FF6AB8 /* opticluster.cpp in Sources */, 481FB52E1AC1B0CB0076CFF3 /* testsetseedcommand.cpp in Sources */, 481FB65A1AC1B8100076CFF3 /* wilcox.cpp in Sources */, 481FB6251AC1B7EA0076CFF3 /* alignment.cpp in Sources */, 481FB5C51AC1B74F0076CFF3 /* getrabundcommand.cpp in Sources */, 481FB56E1AC1B6C30076CFF3 /* sharedsorest.cpp in Sources */, 48E544751E9D3C1200FF6AB8 /* ppv.cpp in Sources */, 481FB6161AC1B7AC0076CFF3 /* summaryqualcommand.cpp in Sources */, 481FB56F1AC1B6C70076CFF3 /* sharedthetan.cpp in Sources */, 481FB5B21AC1B7300076CFF3 /* deuniqueseqscommand.cpp in Sources */, 481FB6331AC1B7EA0076CFF3 /* kmer.cpp in Sources */, 4815BEB9228DD18400677EE2 /* lsabundance.cpp in Sources */, 481FB5BC1AC1B74F0076CFF3 /* getgroupscommand.cpp in Sources */, 481FB5891AC1B6FF0076CFF3 /* maligner.cpp in Sources */, 481FB5CC1AC1B74F0076CFF3 /* heatmapsimcommand.cpp in Sources */, 481FB54C1AC1B62D0076CFF3 /* manhattan.cpp in Sources */, 481FB5E41AC1B77E0076CFF3 /* mimarksattributescommand.cpp in Sources */, 4815BEC62296F19500677EE2 /* sirarefaction.cpp in Sources */, 481FB5C11AC1B74F0076CFF3 /* getlistcountcommand.cpp in Sources */, 481FB57C1AC1B6EA0076CFF3 /* structkulczynski.cpp in Sources */, 481FB5BF1AC1B74F0076CFF3 /* getmimarkspackagecommand.cpp in Sources */, 481FB67A1AC1B88F0076CFF3 /* readtree.cpp in Sources */, 481FB6061AC1B7970076CFF3 /* seqsummarycommand.cpp in Sources */, 481FB54A1AC1B6270076CFF3 /* jackknife.cpp in Sources */, 481FB5431AC1B6110076CFF3 /* geom.cpp in Sources */, 481FB5761AC1B6EA0076CFF3 /* solow.cpp in Sources */, 481FB5421AC1B60D0076CFF3 /* efron.cpp in Sources */, 4815BEC22295CE6800677EE2 /* siabundance.cpp in Sources */, 481FB5461AC1B6190076CFF3 /* hamming.cpp in Sources */, 481FB6891AC1BA760076CFF3 /* phylosummary.cpp in Sources */, 481FB6881AC1B8B80076CFF3 /* weightedlinkage.cpp in Sources */, 4815BECA22970FA700677EE2 /* sishift.cpp in Sources */, 489AF68F2106188E0028155E /* sensspeccalc.cpp in Sources */, 480E8DB21CAB1F5E00A0D137 /* vsearchfileparser.cpp in Sources */, 481FB61E1AC1B7AC0076CFF3 /* unifracweightedcommand.cpp in Sources */, 48910D441D5243E500F60EDB /* mergecountcommand.cpp in Sources */, 48B44EF31FB9EF8200789C45 /* utils.cpp in Sources */, 481FB5951AC1B71B0076CFF3 /* chimerabellerophoncommand.cpp in Sources */, 481FB68D1AC1BA9E0076CFF3 /* classify.cpp in Sources */, 481FB65F1AC1B8450076CFF3 /* myseqdist.cpp in Sources */, 48C728751B6AB4CD00D40830 /* testgetgroupscommand.cpp in Sources */, 481FB6391AC1B7EA0076CFF3 /* ordervector.cpp in Sources */, 481FB59A1AC1B71B0076CFF3 /* chimeraslayercommand.cpp in Sources */, 489AF691210619140028155E /* sharedrabundvector.cpp in Sources */, 48E543EB1E8F15A500FF6AB8 /* summary.cpp in Sources */, 48CF76F121BEBDE000B2FB5C /* mergeotuscommand.cpp in Sources */, 481FB5901AC1B71B0076CFF3 /* aligncommand.cpp in Sources */, 481FB6081AC1B7970076CFF3 /* setdircommand.cpp in Sources */, 481FB62C1AC1B7EA0076CFF3 /* designmap.cpp in Sources */, 481FB5661AC1B6AA0076CFF3 /* sharedmarczewski.cpp in Sources */, 481FB5881AC1B6FF0076CFF3 /* chimeraslayer.cpp in Sources */, 481FB6761AC1B88F0076CFF3 /* readblast.cpp in Sources */, 481FB5D81AC1B75C0076CFF3 /* makecontigscommand.cpp in Sources */, 481FB6481AC1B7EA0076CFF3 /* sparsedistancematrix.cpp in Sources */, 481FB5531AC1B6490076CFF3 /* parsimony.cpp in Sources */, 481FB6641AC1B8450076CFF3 /* optionparser.cpp in Sources */, 481FB68B1AC1BA9E0076CFF3 /* aligntree.cpp in Sources */, 481FB5FB1AC1B77E0076CFF3 /* removelineagecommand.cpp in Sources */, 48E5446D1E9D3A8C00FF6AB8 /* f1score.cpp in Sources */, 48998B6A242E785100DBD0A9 /* onegapdist.cpp in Sources */, 481FB57A1AC1B6EA0076CFF3 /* structchord.cpp in Sources */, 481FB6651AC1B8450076CFF3 /* overlap.cpp in Sources */, 481FB6841AC1B8B80076CFF3 /* trimoligos.cpp in Sources */, 481FB6401AC1B7EA0076CFF3 /* sequencedb.cpp in Sources */, 48576EA81D05F59300BBC9C0 /* distpdataset.cpp in Sources */, 481FB5C81AC1B74F0076CFF3 /* getseqscommand.cpp in Sources */, 481FB6011AC1B7970076CFF3 /* reversecommand.cpp in Sources */, 481FB55E1AC1B66D0076CFF3 /* sharedjackknife.cpp in Sources */, 481FB64B1AC1B7F40076CFF3 /* suffixtree.cpp in Sources */, 481FB5F21AC1B77E0076CFF3 /* phylotypecommand.cpp in Sources */, 481FB61D1AC1B7AC0076CFF3 /* unifracunweightedcommand.cpp in Sources */, 481FB6141AC1B7AC0076CFF3 /* subsamplecommand.cpp in Sources */, 481FB5481AC1B61F0076CFF3 /* hellinger.cpp in Sources */, 481FB5D41AC1B75C0076CFF3 /* listseqscommand.cpp in Sources */, F4A866B8265BE7720010479A /* protein.cpp in Sources */, 481FB6521AC1B8100076CFF3 /* fileoutput.cpp in Sources */, 484976E022552E0B00F3A291 /* erarefaction.cpp in Sources */, 481FB6851AC1B8B80076CFF3 /* validcalculator.cpp in Sources */, 489AF690210618A80028155E /* optiblastmatrix.cpp in Sources */, 481FB56D1AC1B6C10076CFF3 /* sharedsorclass.cpp in Sources */, 481FB5931AC1B71B0076CFF3 /* binsequencecommand.cpp in Sources */, 481FB6861AC1B8B80076CFF3 /* validparameter.cpp in Sources */, 481FB6431AC1B7EA0076CFF3 /* sharedordervector.cpp in Sources */, 4803D5AD211CA67F001C63B5 /* testsharedrabundvector.cpp in Sources */, 481FB5301AC1B5C80076CFF3 /* calcsparcc.cpp in Sources */, 481FB5B01AC1B7300076CFF3 /* uniqueseqscommand.cpp in Sources */, 481FB6001AC1B7970076CFF3 /* renameseqscommand.cpp in Sources */, 481FB5921AC1B71B0076CFF3 /* anosimcommand.cpp in Sources */, 481FB6201AC1B7B30076CFF3 /* commandoptionparser.cpp in Sources */, 481FB5341AC1B5D60076CFF3 /* dmat.cpp in Sources */, 481FB6171AC1B7AC0076CFF3 /* summarysharedcommand.cpp in Sources */, 481FB68C1AC1BA9E0076CFF3 /* bayesian.cpp in Sources */, 481FB5F41AC1B77E0076CFF3 /* preclustercommand.cpp in Sources */, 48E543ED1E8F15C800FF6AB8 /* optimatrix.cpp in Sources */, 481FB5911AC1B71B0076CFF3 /* amovacommand.cpp in Sources */, 4829D9671B8387D0002EEED4 /* testbiominfocommand.cpp in Sources */, 484976E42255412400F3A291 /* igabundance.cpp in Sources */, 48BDDA721EC9D31400F0F6C0 /* sharedrabundvectors.hpp in Sources */, 4815BEB5228B371E00677EE2 /* lnshift.cpp in Sources */, 481FB58A1AC1B6FF0076CFF3 /* myPerseus.cpp in Sources */, 487D09EC1CB2CEFE007039BF /* averagelinkage.cpp in Sources */, 481FB63E1AC1B7EA0076CFF3 /* sabundvector.cpp in Sources */, 481FB57D1AC1B6EA0076CFF3 /* structpearson.cpp in Sources */, 481FB5331AC1B5D30076CFF3 /* distclearcut.cpp in Sources */, 481FB6811AC1B8960076CFF3 /* subsample.cpp in Sources */, 481FB5521AC1B6450076CFF3 /* odum.cpp in Sources */, 481FB68E1AC1BA9E0076CFF3 /* kmernode.cpp in Sources */, 48C1DDC71D25C1BC00B5BA9D /* (null) in Sources */, 481FB5CE1AC1B75C0076CFF3 /* homovacommand.cpp in Sources */, 481FB6551AC1B8100076CFF3 /* heatmap.cpp in Sources */, 481FB5E61AC1B77E0076CFF3 /* normalizesharedcommand.cpp in Sources */, 48E5444E1E9C2C8F00FF6AB8 /* tptn.cpp in Sources */, 481FB5E71AC1B77E0076CFF3 /* nmdscommand.cpp in Sources */, 481FB52B1AC1B09F0076CFF3 /* setseedcommand.cpp in Sources */, 481FB5261AC0ADA00076CFF3 /* sequence.cpp in Sources */, 481FB5C61AC1B74F0076CFF3 /* getrelabundcommand.cpp in Sources */, 481FB6571AC1B8100076CFF3 /* inputdata.cpp in Sources */, 481FB5451AC1B6170076CFF3 /* gower.cpp in Sources */, 481FB5AC1AC1B7300076CFF3 /* corraxescommand.cpp in Sources */, 481FB5A11AC1B71B0076CFF3 /* classifytreecommand.cpp in Sources */, 48F1C16723D606050034DAAF /* makeclrcommand.cpp in Sources */, 48C7286A1B69598400D40830 /* testmergegroupscommand.cpp in Sources */, 481FB62F1AC1B7EA0076CFF3 /* fastqread.cpp in Sources */, 481FB6901AC1BA9E0076CFF3 /* knn.cpp in Sources */, 48E5444A1E9C2BE100FF6AB8 /* specificity.cpp in Sources */, 481FB56C1AC1B6BE0076CFF3 /* sharedsorabund.cpp in Sources */, 481FB6411AC1B7EA0076CFF3 /* sequenceparser.cpp in Sources */, 481FB6381AC1B7EA0076CFF3 /* oligos.cpp in Sources */, 481FB59E1AC1B71B0076CFF3 /* classifyseqscommand.cpp in Sources */, 4810D5B7218208CC00C668E8 /* testcounttable.cpp in Sources */, 481FB5CF1AC1B75C0076CFF3 /* indicatorcommand.cpp in Sources */, F4B4B0DD27396EF7003B2133 /* translateseqscommand.cpp in Sources */, 481FB64F1AC1B8100076CFF3 /* consensus.cpp in Sources */, 481FB5441AC1B6140076CFF3 /* goodscoverage.cpp in Sources */, 481FB5DD1AC1B77E0076CFF3 /* distsharedcommand.cpp in Sources */, 481FB5771AC1B6EA0076CFF3 /* spearman.cpp in Sources */, 48F1C16F23D78F8D0034DAAF /* sharedclrvector.cpp in Sources */, 48ED1E7A235E1ACA003E66F7 /* scriptengine.cpp in Sources */, 481FB6031AC1B7970076CFF3 /* aligncheckcommand.cpp in Sources */, 481FB5361AC1B5DC0076CFF3 /* getopt_long.cpp in Sources */, 481FB5A41AC1B7300076CFF3 /* clustercommand.cpp in Sources */, 481FB5671AC1B6AD0076CFF3 /* sharedmorisitahorn.cpp in Sources */, 481FB5581AC1B6590076CFF3 /* shannonrange.cpp in Sources */, 481FB5601AC1B6790076CFF3 /* sharedjest.cpp in Sources */, 481FB64A1AC1B7F40076CFF3 /* suffixnodes.cpp in Sources */, 488841661CC6C35500C5E972 /* renamefilecommand.cpp in Sources */, 481FB53F1AC1B6000076CFF3 /* canberra.cpp in Sources */, 48A11C6E1CDA40F0003481D8 /* testrenamefilecommand.cpp in Sources */, 481FB5BD1AC1B74F0076CFF3 /* getlabelcommand.cpp in Sources */, 481FB5B91AC1B74F0076CFF3 /* getcurrentcommand.cpp in Sources */, 481FB5991AC1B71B0076CFF3 /* chimeraperseuscommand.cpp in Sources */, 481FB68F1AC1BA9E0076CFF3 /* kmertree.cpp in Sources */, 481FB5CB1AC1B74F0076CFF3 /* heatmapcommand.cpp in Sources */, 481FB60C1AC1B7AC0076CFF3 /* makesharedcommand.cpp in Sources */, 481FB5701AC1B6CA0076CFF3 /* sharedthetayc.cpp in Sources */, 481FB62D1AC1B7EA0076CFF3 /* distancedb.cpp in Sources */, 481FB5AA1AC1B7300076CFF3 /* consensusseqscommand.cpp in Sources */, 481FB5AE1AC1B7300076CFF3 /* countseqscommand.cpp in Sources */, 48C7287A1B728D6B00D40830 /* biominfocommand.cpp in Sources */, 481FB5811AC1B6EA0076CFF3 /* whittaker.cpp in Sources */, 481FB58E1AC1B7060076CFF3 /* completelinkage.cpp in Sources */, 481FB6301AC1B7EA0076CFF3 /* flowdata.cpp in Sources */, 481FB59B1AC1B71B0076CFF3 /* chimerauchimecommand.cpp in Sources */, 481FB5971AC1B71B0076CFF3 /* chimeracheckcommand.cpp in Sources */, 481FB5271AC0ADBA0076CFF3 /* mothurout.cpp in Sources */, 481FB54D1AC1B6300076CFF3 /* memchi2.cpp in Sources */, 481FB5E01AC1B77E0076CFF3 /* mergegroupscommand.cpp in Sources */, 481FB56B1AC1B6BB0076CFF3 /* sharedsobscollectsummary.cpp in Sources */, 481FB57F1AC1B6EA0076CFF3 /* uvest.cpp in Sources */, 48E543EE1E92B91100FF6AB8 /* chimeravsearchcommand.cpp in Sources */, 481FB5791AC1B6EA0076CFF3 /* structchi2.cpp in Sources */, 481FB63B1AC1B7EA0076CFF3 /* rabundvector.cpp in Sources */, 481FB5A91AC1B7300076CFF3 /* collectsharedcommand.cpp in Sources */, 4803D5B621231D9D001C63B5 /* testsharedrabundfloatvectors.cpp in Sources */, 481FB6211AC1B7BA0076CFF3 /* communitytype.cpp in Sources */, 48910D521D58E26C00F60EDB /* distcdataset.cpp in Sources */, 48C1DDC61D25C1BC00B5BA9D /* (null) in Sources */, 489AF6962106195E0028155E /* optidata.cpp in Sources */, 48E544711E9D3B2D00FF6AB8 /* accuracy.cpp in Sources */, 481FB5621AC1B69E0076CFF3 /* sharedkstest.cpp in Sources */, 481FB5E91AC1B77E0076CFF3 /* otuhierarchycommand.cpp in Sources */, 489387F9210F633E00284329 /* testOligos.cpp in Sources */, 481FB5351AC1B5D90076CFF3 /* fasta.cpp in Sources */, 481FB6321AC1B7EA0076CFF3 /* groupmap.cpp in Sources */, 481FB5FF1AC1B7970076CFF3 /* removeseqscommand.cpp in Sources */, 48BD4EB921F77258008EA73D /* filefile.cpp in Sources */, 48F06CCD1D74BEC4004A45DD /* testphylotree.cpp in Sources */, 481FB6771AC1B88F0076CFF3 /* readcluster.cpp in Sources */, 481FB5831AC1B6FF0076CFF3 /* ccode.cpp in Sources */, 481FB5681AC1B6B20076CFF3 /* sharedochiai.cpp in Sources */, 481FB56A1AC1B6B80076CFF3 /* sharedsobs.cpp in Sources */, 481FB5DB1AC1B75C0076CFF3 /* makelefsecommand.cpp in Sources */, 48E544561E9C2DF500FF6AB8 /* tn.cpp in Sources */, 481FB6371AC1B7EA0076CFF3 /* nameassignment.cpp in Sources */, 489AF694210619410028155E /* optirefmatrix.cpp in Sources */, 481FB5D21AC1B75C0076CFF3 /* libshuffcommand.cpp in Sources */, 4809EC99227B405700B4D0E5 /* metrologstudent.cpp in Sources */, 481FB5561AC1B6520076CFF3 /* shannon.cpp in Sources */, 481FB6591AC1B8100076CFF3 /* linearalgebra.cpp in Sources */, 481FB5411AC1B6070076CFF3 /* coverage.cpp in Sources */, 480E8DB11CAB12ED00A0D137 /* testfastqread.cpp in Sources */, 481FB6231AC1B7BA0076CFF3 /* pam.cpp in Sources */, 481FB5BA1AC1B74F0076CFF3 /* getdistscommand.cpp in Sources */, 481FB6191AC1B7AC0076CFF3 /* systemcommand.cpp in Sources */, 481FB6611AC1B8450076CFF3 /* alignreport.cpp in Sources */, 48DB37B41B3B27E000C372A4 /* makefilecommand.cpp in Sources */, 481FB6181AC1B7AC0076CFF3 /* summarytaxcommand.cpp in Sources */, 481FB5CD1AC1B74F0076CFF3 /* helpcommand.cpp in Sources */, 481FB6701AC1B8820076CFF3 /* raredisplay.cpp in Sources */, 481FB5F91AC1B77E0076CFF3 /* removedistscommand.cpp in Sources */, 481FB6581AC1B8100076CFF3 /* libshuff.cpp in Sources */, 481FB59D1AC1B71B0076CFF3 /* classifyotucommand.cpp in Sources */, 481FB5781AC1B6EA0076CFF3 /* speciesprofile.cpp in Sources */, 481FB5401AC1B6030076CFF3 /* chao1.cpp in Sources */, 481FB5591AC1B65D0076CFF3 /* sharedjabund.cpp in Sources */, 481FB62A1AC1B7EA0076CFF3 /* counttable.cpp in Sources */, 481FB53A1AC1B5EC0076CFF3 /* bergerparker.cpp in Sources */, 482AC3BA2562B57600C9AF4A /* picrust.cpp in Sources */, 48E544521E9C2CFD00FF6AB8 /* tp.cpp in Sources */, 481FB5AD1AC1B7300076CFF3 /* countgroupscommand.cpp in Sources */, 481FB61C1AC1B7AC0076CFF3 /* trimseqscommand.cpp in Sources */, 481FB5311AC1B5CD0076CFF3 /* clearcut.cpp in Sources */, 480D1E2A1EA681D100BF9C77 /* testclustercalcs.cpp in Sources */, 481FB5651AC1B6A70076CFF3 /* sharedlennon.cpp in Sources */, 481FB6241AC1B7BA0076CFF3 /* qFinderDMM.cpp in Sources */, F4A866C0265BE7EC0010479A /* aminoacid.cpp in Sources */, 481FB6311AC1B7EA0076CFF3 /* fullmatrix.cpp in Sources */, 481FB51C1AC0A63E0076CFF3 /* main.cpp in Sources */, 481FB58F1AC1B71B0076CFF3 /* newcommandtemplate.cpp in Sources */, 48098ED6219DE7A500031FA4 /* testsubsample.cpp in Sources */, 4809ECA22280898E00B4D0E5 /* igrarefaction.cpp in Sources */, 481FB5571AC1B6550076CFF3 /* shannoneven.cpp in Sources */, 481FB5D11AC1B75C0076CFF3 /* lefsecommand.cpp in Sources */, 481FB6561AC1B8100076CFF3 /* heatmapsim.cpp in Sources */, 489AF6932106192E0028155E /* clusterfitcommand.cpp in Sources */, 481FB5EA1AC1B77E0076CFF3 /* pairwiseseqscommand.cpp in Sources */, 481FB5F11AC1B77E0076CFF3 /* phylodiversitycommand.cpp in Sources */, 481FB5501AC1B63D0076CFF3 /* mempearson.cpp in Sources */, 481FB5B51AC1B7300076CFF3 /* filterseqscommand.cpp in Sources */, 481FB6621AC1B8450076CFF3 /* noalign.cpp in Sources */, F4A866D2266912830010479A /* proteindb.cpp in Sources */, 481FB5E31AC1B77E0076CFF3 /* mgclustercommand.cpp in Sources */, 481FB5491AC1B6220076CFF3 /* invsimpson.cpp in Sources */, 4809ECA622831A5E00B4D0E5 /* lnabundance.cpp in Sources */, 48576EA51D05E8F600BBC9C0 /* testoptimatrix.cpp in Sources */, 481FB5821AC1B6FF0076CFF3 /* bellerophon.cpp in Sources */, 481FB6731AC1B8820076CFF3 /* seqnoise.cpp in Sources */, 481FB5DC1AC1B75C0076CFF3 /* makelookupcommand.cpp in Sources */, 481FB53D1AC1B5F80076CFF3 /* bstick.cpp in Sources */, 481FB60B1AC1B7AC0076CFF3 /* sffmultiplecommand.cpp in Sources */, 481FB5F51AC1B77E0076CFF3 /* primerdesigncommand.cpp in Sources */, 481FB5B41AC1B7300076CFF3 /* distancecommand.cpp in Sources */, 481FB5391AC1B5E90076CFF3 /* ace.cpp in Sources */, 481FB5751AC1B6EA0076CFF3 /* soergel.cpp in Sources */, 481FB5DA1AC1B75C0076CFF3 /* makegroupcommand.cpp in Sources */, 488841621CC515A000C5E972 /* (null) in Sources */, 481FB5691AC1B6B50076CFF3 /* sharedrjsd.cpp in Sources */, 481FB6801AC1B8960076CFF3 /* slibshuff.cpp in Sources */, 481FB67B1AC1B88F0076CFF3 /* readphylipvector.cpp in Sources */, 481FB64C1AC1B7F40076CFF3 /* tree.cpp in Sources */, 481FB6631AC1B8450076CFF3 /* needlemanoverlap.cpp in Sources */, 481FB6931AC1BAA60076CFF3 /* taxonomynode.cpp in Sources */, 481FB60E1AC1B7AC0076CFF3 /* shhhseqscommand.cpp in Sources */, 481FB5E11AC1B77E0076CFF3 /* mergetaxsummarycommand.cpp in Sources */, 483A9BAF225BBE55006102DF /* metroig.cpp in Sources */, 481FB5AB1AC1B7300076CFF3 /* cooccurrencecommand.cpp in Sources */, 481FB5D61AC1B75C0076CFF3 /* mantelcommand.cpp in Sources */, 481FB57E1AC1B6EA0076CFF3 /* unweighted.cpp in Sources */, 481FB60F1AC1B7AC0076CFF3 /* sortseqscommand.cpp in Sources */, 4809EC95227B3A5B00B4D0E5 /* metrolognormal.cpp in Sources */, 489387FA2110C79200284329 /* testtrimoligos.cpp in Sources */, 481FB67D1AC1B88F0076CFF3 /* treereader.cpp in Sources */, 481FB6131AC1B7AC0076CFF3 /* sracommand.cpp in Sources */, 48C728671B66AB8800D40830 /* pcrseqscommand.cpp in Sources */, 481FB5541AC1B64C0076CFF3 /* prng.cpp in Sources */, 481FB57B1AC1B6EA0076CFF3 /* structeuclidean.cpp in Sources */, 481FB6221AC1B7BA0076CFF3 /* kmeans.cpp in Sources */, 481FB54B1AC1B62A0076CFF3 /* logsd.cpp in Sources */, 481FB55A1AC1B6600076CFF3 /* sharedace.cpp in Sources */, 481FB5BB1AC1B74F0076CFF3 /* getgroupcommand.cpp in Sources */, 481FB6361AC1B7EA0076CFF3 /* listvector.cpp in Sources */, 481FB5ED1AC1B77E0076CFF3 /* parsimonycommand.cpp in Sources */, 481FB55F1AC1B6750076CFF3 /* sharedjclass.cpp in Sources */, 481FB6101AC1B7AC0076CFF3 /* sparcccommand.cpp in Sources */, 481FB5E81AC1B77E0076CFF3 /* otuassociationcommand.cpp in Sources */, 481FB5B31AC1B7300076CFF3 /* deuniquetreecommand.cpp in Sources */, 481FB5D91AC1B75C0076CFF3 /* makefastqcommand.cpp in Sources */, 481FB5A21AC1B71B0076CFF3 /* clearcutcommand.cpp in Sources */, 48D6E9681CA42389008DF76B /* testvsearchfileparser.cpp in Sources */, 4809EC9E227C9B3100B4D0E5 /* metrosichel.cpp in Sources */, 481FB5851AC1B6FF0076CFF3 /* chimeracheckrdp.cpp in Sources */, 481FB55C1AC1B6660076CFF3 /* sharedbraycurtis.cpp in Sources */, 481FB5BE1AC1B74F0076CFF3 /* getmetacommunitycommand.cpp in Sources */, 481FB6821AC1B8AF0076CFF3 /* svm.cpp in Sources */, 481FB6911AC1BAA60076CFF3 /* phylotree.cpp in Sources */, 481FB6261AC1B7EA0076CFF3 /* alignmentcell.cpp in Sources */, 481FB5C21AC1B74F0076CFF3 /* getoturepcommand.cpp in Sources */, 481FB5D01AC1B75C0076CFF3 /* kruskalwalliscommand.cpp in Sources */, 48B662031BBB1B6600997EE4 /* testrenameseqscommand.cpp in Sources */, 48E418561D08893A004C36AB /* (null) in Sources */, 481FB5511AC1B6410076CFF3 /* npshannon.cpp in Sources */, 481FB6471AC1B7EA0076CFF3 /* sparsematrix.cpp in Sources */, 481FB5871AC1B6FF0076CFF3 /* decalc.cpp in Sources */, 481FB6791AC1B88F0076CFF3 /* readphylip.cpp in Sources */, 481FB6151AC1B7AC0076CFF3 /* summarycommand.cpp in Sources */, 481FB5EE1AC1B77E0076CFF3 /* pcacommand.cpp in Sources */, 48BDDA761ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp in Sources */, 481FB5711AC1B6D40076CFF3 /* shen.cpp in Sources */, 4803D5B0211CD839001C63B5 /* testsharedrabundfloatvector.cpp in Sources */, 481FB6501AC1B8100076CFF3 /* dlibshuff.cpp in Sources */, 484976E82256799100F3A291 /* diversityestimatorcommand.cpp in Sources */, 481FB64D1AC1B7F40076CFF3 /* treemap.cpp in Sources */, 481FB67F1AC1B8960076CFF3 /* singlelinkage.cpp in Sources */, 481FB5641AC1B6A40076CFF3 /* sharedkulczynskicody.cpp in Sources */, 481FB60D1AC1B7AC0076CFF3 /* shhhercommand.cpp in Sources */, 481FB5FA1AC1B77E0076CFF3 /* removegroupscommand.cpp in Sources */, 481FB5371AC1B5E00076CFF3 /* cluster.cpp in Sources */, 481FB53B1AC1B5EF0076CFF3 /* boneh.cpp in Sources */, 481FB6071AC1B7970076CFF3 /* setcurrentcommand.cpp in Sources */, 481FB5321AC1B5D00076CFF3 /* cmdargs.cpp in Sources */, F44268EF27BD52D50000C15D /* alignmusclecommand.cpp in Sources */, 481FB6711AC1B8820076CFF3 /* rarefact.cpp in Sources */, F4103AD625A4DB80001ED741 /* sharedrabundvectors.cpp in Sources */, 481FB5841AC1B6FF0076CFF3 /* mothurchimera.cpp in Sources */, 481FB6121AC1B7AC0076CFF3 /* splitgroupscommand.cpp in Sources */, 489AF692210619170028155E /* sharedrabundfloatvector.cpp in Sources */, 481FB6921AC1BAA60076CFF3 /* taxonomyequalizer.cpp in Sources */, 481FB68A1AC1BA9E0076CFF3 /* alignnode.cpp in Sources */, 481FB58B1AC1B6FF0076CFF3 /* pintail.cpp in Sources */, 4815BEBD2293189600677EE2 /* lsrarefaction.cpp in Sources */, 48D6E96B1CA4262A008DF76B /* dataset.cpp in Sources */, 481FB6041AC1B7970076CFF3 /* sensspeccommand.cpp in Sources */, 481FB6491AC1B7F40076CFF3 /* suffixdb.cpp in Sources */, 481FB6111AC1B7AC0076CFF3 /* splitabundcommand.cpp in Sources */, 481FB5471AC1B61C0076CFF3 /* heip.cpp in Sources */, 481FB5D31AC1B75C0076CFF3 /* listotuscommand.cpp in Sources */, 481FB5551AC1B64F0076CFF3 /* qstat.cpp in Sources */, 481FB5DF1AC1B77E0076CFF3 /* mergefilecommand.cpp in Sources */, 481FB5981AC1B71B0076CFF3 /* chimerapintailcommand.cpp in Sources */, 481FB6091AC1B7970076CFF3 /* setlogfilecommand.cpp in Sources */, 481FB5C91AC1B74F0076CFF3 /* getsharedotucommand.cpp in Sources */, 481FB5B11AC1B7300076CFF3 /* degapseqscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; 8DD76FAB0486AB0100D96B5E /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */, A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */, A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */, A7E9B88412D37EC400DA6239 /* alignmentcell.cpp in Sources */, 48ED1E81235E1D59003E66F7 /* batchengine.cpp in Sources */, 48A0B8F125472C4500726384 /* biomhdf5.cpp in Sources */, A7E9B88512D37EC400DA6239 /* alignmentdb.cpp in Sources */, A7E9B88712D37EC400DA6239 /* bayesian.cpp in Sources */, A7E9B88812D37EC400DA6239 /* bellerophon.cpp in Sources */, A7E9B88912D37EC400DA6239 /* bergerparker.cpp in Sources */, F4103AD325A4DB7F001ED741 /* sharedrabundvectors.cpp in Sources */, A7E9B88A12D37EC400DA6239 /* binsequencecommand.cpp in Sources */, A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */, A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */, A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */, A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */, 48FB99C5209B69FA00FF9F6E /* optirefmatrix.cpp in Sources */, A7E9B89412D37EC400DA6239 /* ccode.cpp in Sources */, A7E9B89512D37EC400DA6239 /* chao1.cpp in Sources */, A7E9B89612D37EC400DA6239 /* mothurchimera.cpp in Sources */, A7E9B89712D37EC400DA6239 /* chimerabellerophoncommand.cpp in Sources */, A7E9B89812D37EC400DA6239 /* chimeraccodecommand.cpp in Sources */, A7E9B89912D37EC400DA6239 /* chimeracheckcommand.cpp in Sources */, 48E981CF189C38FB0042BE9D /* (null) in Sources */, A7E9B89A12D37EC400DA6239 /* chimeracheckrdp.cpp in Sources */, A7E9B89B12D37EC400DA6239 /* chimerapintailcommand.cpp in Sources */, F45A2E3D25A78B4D00994F76 /* contigsreport.cpp in Sources */, F4A866D1266912830010479A /* proteindb.cpp in Sources */, 48FB99CF20A4F3FB00FF9F6E /* optifitcluster.cpp in Sources */, A7E9B89C12D37EC400DA6239 /* chimerarealigner.cpp in Sources */, A7E9B89E12D37EC400DA6239 /* chimeraslayer.cpp in Sources */, 488C1DEA242D102B00BDCCB4 /* optidb.cpp in Sources */, A7E9B89F12D37EC400DA6239 /* chimeraslayercommand.cpp in Sources */, A7E9B8A012D37EC400DA6239 /* chopseqscommand.cpp in Sources */, A7E9B8A112D37EC400DA6239 /* classify.cpp in Sources */, 48576EA21D05DBCD00BBC9C0 /* vsearchfileparser.cpp in Sources */, 48910D461D58CAD700F60EDB /* opticluster.cpp in Sources */, A7E9B8A212D37EC400DA6239 /* classifyotucommand.cpp in Sources */, F44268EE27BD52D50000C15D /* alignmusclecommand.cpp in Sources */, A7E9B8A312D37EC400DA6239 /* classifyseqscommand.cpp in Sources */, 48A055332491577800D0F97F /* sffheader.cpp in Sources */, 481E40E1244F62980059C925 /* calculator.cpp in Sources */, A7E9B8A412D37EC400DA6239 /* clearcut.cpp in Sources */, 48ED1E79235E1ACA003E66F7 /* scriptengine.cpp in Sources */, A7E9B8A512D37EC400DA6239 /* clearcutcommand.cpp in Sources */, A7E9B8A612D37EC400DA6239 /* cluster.cpp in Sources */, A7E9B8A712D37EC400DA6239 /* clusterclassic.cpp in Sources */, A7E9B8A812D37EC400DA6239 /* clustercommand.cpp in Sources */, A7E9B8A912D37EC400DA6239 /* clusterdoturcommand.cpp in Sources */, A7E9B8AA12D37EC400DA6239 /* clusterfragmentscommand.cpp in Sources */, 48EDB76C1D1320DD00F76E93 /* chimeravsearchcommand.cpp in Sources */, A7E9B8AB12D37EC400DA6239 /* clustersplitcommand.cpp in Sources */, 48E5446C1E9D3A8C00FF6AB8 /* f1score.cpp in Sources */, A7E9B8AC12D37EC400DA6239 /* cmdargs.cpp in Sources */, A7E9B8AD12D37EC400DA6239 /* collect.cpp in Sources */, 481E40DF244F619D0059C925 /* eachgapignore.cpp in Sources */, A7E9B8AE12D37EC400DA6239 /* collectcommand.cpp in Sources */, A7E9B8AF12D37EC400DA6239 /* collectsharedcommand.cpp in Sources */, A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */, A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */, A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */, A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */, A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */, A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */, A7E9B8B812D37EC400DA6239 /* decalc.cpp in Sources */, A7E9B8B912D37EC400DA6239 /* uniqueseqscommand.cpp in Sources */, A7E9B8BA12D37EC400DA6239 /* degapseqscommand.cpp in Sources */, F4A866BF265BE7EC0010479A /* aminoacid.cpp in Sources */, 4815BEB12289E13500677EE2 /* lnrarefaction.cpp in Sources */, A7E9B8BB12D37EC400DA6239 /* deuniqueseqscommand.cpp in Sources */, A7E9B8BC12D37EC400DA6239 /* distancecommand.cpp in Sources */, A7E9B8BD12D37EC400DA6239 /* distancedb.cpp in Sources */, A7E9B8BE12D37EC400DA6239 /* distclearcut.cpp in Sources */, A7E9B8BF12D37EC400DA6239 /* dlibshuff.cpp in Sources */, A7E9B8C012D37EC400DA6239 /* dmat.cpp in Sources */, A7E9B8C112D37EC400DA6239 /* efron.cpp in Sources */, A7E9B8C312D37EC400DA6239 /* fasta.cpp in Sources */, A7E9B8C412D37EC400DA6239 /* fastamap.cpp in Sources */, 48BDDA751ECA067000F0F6C0 /* sharedrabundfloatvectors.cpp in Sources */, A7E9B8C512D37EC400DA6239 /* fileoutput.cpp in Sources */, A7E9B8C612D37EC400DA6239 /* filterseqscommand.cpp in Sources */, A7E9B8C812D37EC400DA6239 /* flowdata.cpp in Sources */, 48910D431D5243E500F60EDB /* mergecountcommand.cpp in Sources */, 48E0230324BF488D00BFEA41 /* report.cpp in Sources */, A7E9B8CB12D37EC400DA6239 /* fullmatrix.cpp in Sources */, A7E9B8CC12D37EC400DA6239 /* geom.cpp in Sources */, A7E9B8CD12D37EC400DA6239 /* getgroupcommand.cpp in Sources */, A7E9B8CE12D37EC400DA6239 /* getgroupscommand.cpp in Sources */, A7E9B8CF12D37EC400DA6239 /* getlabelcommand.cpp in Sources */, A7E9B8D012D37EC400DA6239 /* getlineagecommand.cpp in Sources */, A7E9B8D112D37EC400DA6239 /* getlistcountcommand.cpp in Sources */, A7E9B8D212D37EC400DA6239 /* getopt_long.cpp in Sources */, 48FB99C920A48EF700FF9F6E /* optidata.cpp in Sources */, A7E9B8D312D37EC400DA6239 /* getoturepcommand.cpp in Sources */, A7E9B8D512D37EC400DA6239 /* getrabundcommand.cpp in Sources */, A7E9B8D612D37EC400DA6239 /* getrelabundcommand.cpp in Sources */, A7E9B8D712D37EC400DA6239 /* getsabundcommand.cpp in Sources */, 48A055302490066C00D0F97F /* sffread.cpp in Sources */, A7E9B8D812D37EC400DA6239 /* getseqscommand.cpp in Sources */, F4A866B7265BE7720010479A /* protein.cpp in Sources */, 4809EC9D227C9B3100B4D0E5 /* metrosichel.cpp in Sources */, A7E9B8D912D37EC400DA6239 /* getsharedotucommand.cpp in Sources */, 48E544701E9D3B2D00FF6AB8 /* accuracy.cpp in Sources */, 4815BEC12295CE6800677EE2 /* siabundance.cpp in Sources */, A7E9B8DB12D37EC400DA6239 /* goodscoverage.cpp in Sources */, A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */, A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */, A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */, 48A0B8F625472C6500726384 /* biomsimple.cpp in Sources */, 48A0B8EC2547282600726384 /* biom.cpp in Sources */, 488841651CC6C34900C5E972 /* renamefilecommand.cpp in Sources */, 4893DE2918EEF28100C615DF /* (null) in Sources */, A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */, A7E9B8E212D37EC400DA6239 /* heatmap.cpp in Sources */, A7E9B8E312D37EC400DA6239 /* heatmapcommand.cpp in Sources */, A7E9B8E412D37EC400DA6239 /* heatmapsim.cpp in Sources */, A7E9B8E512D37EC400DA6239 /* heatmapsimcommand.cpp in Sources */, A7E9B8E612D37EC400DA6239 /* heip.cpp in Sources */, 481FB52A1AC19F8B0076CFF3 /* setseedcommand.cpp in Sources */, A7E9B8E712D37EC400DA6239 /* hellinger.cpp in Sources */, A7E9B8E812D37EC400DA6239 /* helpcommand.cpp in Sources */, A7E9B8E912D37EC400DA6239 /* indicatorcommand.cpp in Sources */, A7E9B8EA12D37EC400DA6239 /* inputdata.cpp in Sources */, A7E9B8EB12D37EC400DA6239 /* invsimpson.cpp in Sources */, A7E9B8EC12D37EC400DA6239 /* jackknife.cpp in Sources */, A7E9B8ED12D37EC400DA6239 /* kmer.cpp in Sources */, A7E9B8EE12D37EC400DA6239 /* kmerdb.cpp in Sources */, A7E9B8EF12D37EC400DA6239 /* knn.cpp in Sources */, 48A85BAD18E1AF2000199B6F /* (null) in Sources */, A7E9B8F012D37EC400DA6239 /* libshuff.cpp in Sources */, 48F1C16623D606050034DAAF /* makeclrcommand.cpp in Sources */, 48F98E4D1A9CFD670005E81B /* completelinkage.cpp in Sources */, A7E9B8F112D37EC400DA6239 /* libshuffcommand.cpp in Sources */, A7E9B8F212D37EC400DA6239 /* listseqscommand.cpp in Sources */, A7E9B8F312D37EC400DA6239 /* listvector.cpp in Sources */, 483A9BAE225BBE55006102DF /* metroig.cpp in Sources */, 48E544451E9C2B1000FF6AB8 /* sensitivity.cpp in Sources */, A7E9B8F412D37EC400DA6239 /* logsd.cpp in Sources */, 482AC3B92562B57600C9AF4A /* picrust.cpp in Sources */, A7E9B8F512D37EC400DA6239 /* makegroupcommand.cpp in Sources */, 48705AC719BE32C50075E977 /* sharedrjsd.cpp in Sources */, 48F1C16E23D78F8D0034DAAF /* sharedclrvector.cpp in Sources */, 48E5445D1E9C2F0F00FF6AB8 /* fn.cpp in Sources */, A7E9B8F612D37EC400DA6239 /* maligner.cpp in Sources */, A7E9B8F712D37EC400DA6239 /* manhattan.cpp in Sources */, A7E9B8F812D37EC400DA6239 /* distsharedcommand.cpp in Sources */, A7E9B8F912D37EC400DA6239 /* memchi2.cpp in Sources */, A7E9B8FA12D37EC400DA6239 /* memchord.cpp in Sources */, A7E9B8FB12D37EC400DA6239 /* memeuclidean.cpp in Sources */, A7E9B8FC12D37EC400DA6239 /* mempearson.cpp in Sources */, A7E9B8FD12D37EC400DA6239 /* mergefilecommand.cpp in Sources */, A7E9B8FF12D37EC400DA6239 /* metastatscommand.cpp in Sources */, F41A1B91261257DE00144985 /* kmerdist.cpp in Sources */, A7E9B90012D37EC400DA6239 /* mgclustercommand.cpp in Sources */, A7E9B90112D37EC400DA6239 /* mothur.cpp in Sources */, A7E9B90212D37EC400DA6239 /* mothurout.cpp in Sources */, A7E9B90312D37EC400DA6239 /* nameassignment.cpp in Sources */, A7E9B90412D37EC400DA6239 /* nast.cpp in Sources */, A7E9B90512D37EC400DA6239 /* alignreport.cpp in Sources */, A7E9B90612D37EC400DA6239 /* needlemanoverlap.cpp in Sources */, A7E9B90712D37EC400DA6239 /* noalign.cpp in Sources */, A7E9B90812D37EC400DA6239 /* nocommands.cpp in Sources */, 481E40DD244F52460059C925 /* ignoregaps.cpp in Sources */, A7E9B90912D37EC400DA6239 /* normalizesharedcommand.cpp in Sources */, A7E9B90A12D37EC400DA6239 /* npshannon.cpp in Sources */, A7E9B90B12D37EC400DA6239 /* odum.cpp in Sources */, A7E9B90C12D37EC400DA6239 /* optionparser.cpp in Sources */, A7E9B90D12D37EC400DA6239 /* ordervector.cpp in Sources */, A7E9B90E12D37EC400DA6239 /* otuhierarchycommand.cpp in Sources */, F4B4B0DC27396EF7003B2133 /* translateseqscommand.cpp in Sources */, A7E9B90F12D37EC400DA6239 /* overlap.cpp in Sources */, A7E9B91012D37EC400DA6239 /* pairwiseseqscommand.cpp in Sources */, A7E9B91112D37EC400DA6239 /* fastaqinfocommand.cpp in Sources */, A7E9B91312D37EC400DA6239 /* parsimony.cpp in Sources */, A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */, A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */, A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */, 488841611CC515A000C5E972 /* (null) in Sources */, 485B0E081F264F2E00CA5F57 /* sharedrabundvector.cpp in Sources */, A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */, A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */, A7E9B91A12D37EC400DA6239 /* phylotypecommand.cpp in Sources */, A7E9B91B12D37EC400DA6239 /* pintail.cpp in Sources */, 48DB37B31B3B27E000C372A4 /* makefilecommand.cpp in Sources */, A7E9B91D12D37EC400DA6239 /* preclustercommand.cpp in Sources */, A7E9B91E12D37EC400DA6239 /* prng.cpp in Sources */, A7E9B92012D37EC400DA6239 /* qstat.cpp in Sources */, A7E9B92112D37EC400DA6239 /* qualityscores.cpp in Sources */, A7E9B92212D37EC400DA6239 /* quitcommand.cpp in Sources */, A7E9B92312D37EC400DA6239 /* rabundvector.cpp in Sources */, A7E9B92512D37EC400DA6239 /* raredisplay.cpp in Sources */, A7E9B92612D37EC400DA6239 /* rarefact.cpp in Sources */, 48B44EF21FB9EF8200789C45 /* utils.cpp in Sources */, A7E9B92712D37EC400DA6239 /* rarefactcommand.cpp in Sources */, A7E9B92812D37EC400DA6239 /* rarefactsharedcommand.cpp in Sources */, A7E9B92912D37EC400DA6239 /* readblast.cpp in Sources */, A7E9B92A12D37EC400DA6239 /* readcluster.cpp in Sources */, A7E9B92B12D37EC400DA6239 /* readcolumn.cpp in Sources */, A7E9B92F12D37EC400DA6239 /* readphylip.cpp in Sources */, 48BD4EB821F7724C008EA73D /* filefile.cpp in Sources */, A7E9B93012D37EC400DA6239 /* readtree.cpp in Sources */, A7E9B93212D37EC400DA6239 /* removegroupscommand.cpp in Sources */, A7E9B93312D37EC400DA6239 /* removelineagecommand.cpp in Sources */, A7E9B93512D37EC400DA6239 /* removeseqscommand.cpp in Sources */, A7E9B93712D37EC400DA6239 /* reversecommand.cpp in Sources */, A7E9B93812D37EC400DA6239 /* sabundvector.cpp in Sources */, A7E9B93912D37EC400DA6239 /* screenseqscommand.cpp in Sources */, A7E9B93A12D37EC400DA6239 /* aligncheckcommand.cpp in Sources */, 4815BEBC2293189600677EE2 /* lsrarefaction.cpp in Sources */, A7E9B93B12D37EC400DA6239 /* sensspeccommand.cpp in Sources */, A7E9B93C12D37EC400DA6239 /* seqerrorcommand.cpp in Sources */, 48FB99CC20A4AD7D00FF9F6E /* optiblastmatrix.cpp in Sources */, A7E9B93D12D37EC400DA6239 /* seqsummarycommand.cpp in Sources */, A7E9B93E12D37EC400DA6239 /* sequence.cpp in Sources */, A7E9B93F12D37EC400DA6239 /* sequencedb.cpp in Sources */, A7E9B94012D37EC400DA6239 /* setdircommand.cpp in Sources */, A7E9B94112D37EC400DA6239 /* setlogfilecommand.cpp in Sources */, A7E9B94212D37EC400DA6239 /* sffinfocommand.cpp in Sources */, A7E9B94312D37EC400DA6239 /* shannon.cpp in Sources */, 483C952E188F0CAD0035E7B7 /* (null) in Sources */, 48910D4B1D58CBA300F60EDB /* optimatrix.cpp in Sources */, A7E9B94412D37EC400DA6239 /* shannoneven.cpp in Sources */, A7E9B94512D37EC400DA6239 /* sharedace.cpp in Sources */, 48BDDA791ECA3B8E00F0F6C0 /* rabundfloatvector.cpp in Sources */, A7E9B94612D37EC400DA6239 /* sharedanderbergs.cpp in Sources */, 48B01D2C2016470F006BE140 /* sensspeccalc.cpp in Sources */, A7E9B94712D37EC400DA6239 /* sharedbraycurtis.cpp in Sources */, A7E9B94812D37EC400DA6239 /* sharedchao1.cpp in Sources */, A7E9B94912D37EC400DA6239 /* makesharedcommand.cpp in Sources */, A7E9B94A12D37EC400DA6239 /* sharedjabund.cpp in Sources */, A7E9B94B12D37EC400DA6239 /* sharedjackknife.cpp in Sources */, 4815BEC52296F19500677EE2 /* sirarefaction.cpp in Sources */, A7E9B94C12D37EC400DA6239 /* sharedjclass.cpp in Sources */, A7E9B94D12D37EC400DA6239 /* sharedjest.cpp in Sources */, 4809ECA12280898E00B4D0E5 /* igrarefaction.cpp in Sources */, 48E544741E9D3C1200FF6AB8 /* ppv.cpp in Sources */, A7E9B94E12D37EC400DA6239 /* sharedkstest.cpp in Sources */, A7E9B94F12D37EC400DA6239 /* sharedkulczynski.cpp in Sources */, A7E9B95012D37EC400DA6239 /* sharedkulczynskicody.cpp in Sources */, 48705AC419BE32C50075E977 /* getmimarkspackagecommand.cpp in Sources */, 48C728791B728D6B00D40830 /* biominfocommand.cpp in Sources */, A7E9B95112D37EC400DA6239 /* sharedlennon.cpp in Sources */, A7E9B95212D37EC400DA6239 /* sharedlistvector.cpp in Sources */, 488563D123CD00C4007B5659 /* taxonomy.cpp in Sources */, A7E9B95312D37EC400DA6239 /* sharedmarczewski.cpp in Sources */, A7E9B95412D37EC400DA6239 /* sharedmorisitahorn.cpp in Sources */, A7E9B95512D37EC400DA6239 /* sharedochiai.cpp in Sources */, A7E9B95612D37EC400DA6239 /* sharedordervector.cpp in Sources */, A7E9B95A12D37EC400DA6239 /* sharedsobs.cpp in Sources */, 48B44EEE1FB5006500789C45 /* currentfile.cpp in Sources */, A7E9B95B12D37EC400DA6239 /* sharedsobscollectsummary.cpp in Sources */, A7E9B95C12D37EC400DA6239 /* sharedsorabund.cpp in Sources */, A7E9B95D12D37EC400DA6239 /* sharedsorclass.cpp in Sources */, A7E9B95E12D37EC400DA6239 /* sharedsorest.cpp in Sources */, 48B01D2920163594006BE140 /* clusterfitcommand.cpp in Sources */, A7E9B95F12D37EC400DA6239 /* sharedthetan.cpp in Sources */, A7E9B96012D37EC400DA6239 /* sharedthetayc.cpp in Sources */, A7E9B96212D37EC400DA6239 /* shen.cpp in Sources */, A7E9B96312D37EC400DA6239 /* shhhercommand.cpp in Sources */, A7E9B96412D37EC400DA6239 /* simpson.cpp in Sources */, A7E9B96512D37EC400DA6239 /* simpsoneven.cpp in Sources */, A7E9B96612D37EC400DA6239 /* singlelinkage.cpp in Sources */, A7E9B96712D37EC400DA6239 /* slayer.cpp in Sources */, A7E9B96812D37EC400DA6239 /* slibshuff.cpp in Sources */, A7E9B96912D37EC400DA6239 /* smithwilson.cpp in Sources */, A7E9B96A12D37EC400DA6239 /* soergel.cpp in Sources */, A7E9B96B12D37EC400DA6239 /* solow.cpp in Sources */, A7E9B96C12D37EC400DA6239 /* sparsematrix.cpp in Sources */, 487C5A871AB88B93002AF48A /* mimarksattributescommand.cpp in Sources */, A7E9B96D12D37EC400DA6239 /* spearman.cpp in Sources */, 48705AC519BE32C50075E977 /* oligos.cpp in Sources */, A7E9B96E12D37EC400DA6239 /* speciesprofile.cpp in Sources */, A7E9B96F12D37EC400DA6239 /* splitabundcommand.cpp in Sources */, A7E9B97012D37EC400DA6239 /* splitgroupscommand.cpp in Sources */, A7E9B97112D37EC400DA6239 /* splitmatrix.cpp in Sources */, A7E9B97212D37EC400DA6239 /* structchi2.cpp in Sources */, A7E9B97312D37EC400DA6239 /* structchord.cpp in Sources */, A7E9B97412D37EC400DA6239 /* structeuclidean.cpp in Sources */, A7E9B97512D37EC400DA6239 /* structkulczynski.cpp in Sources */, 48BDDA711EC9D31400F0F6C0 /* sharedrabundvectors.hpp in Sources */, A7E9B97612D37EC400DA6239 /* structpearson.cpp in Sources */, 48E544411E9C292900FF6AB8 /* mcc.cpp in Sources */, A7E9B97712D37EC400DA6239 /* subsamplecommand.cpp in Sources */, A7E9B97812D37EC400DA6239 /* suffixdb.cpp in Sources */, A7E9B97912D37EC400DA6239 /* suffixnodes.cpp in Sources */, A7E9B97A12D37EC400DA6239 /* suffixtree.cpp in Sources */, A7E9B97B12D37EC400DA6239 /* summarycommand.cpp in Sources */, 48E544511E9C2CFD00FF6AB8 /* tp.cpp in Sources */, 48D36FC924C1EAB0001A0FDC /* (null) in Sources */, A7E9B97C12D37EC400DA6239 /* summarysharedcommand.cpp in Sources */, A7E9B97D12D37EC400DA6239 /* systemcommand.cpp in Sources */, 48C51DF31A793EFE004ECDF1 /* kmeralign.cpp in Sources */, A7E9B97E12D37EC400DA6239 /* taxonomyequalizer.cpp in Sources */, A7E9B97F12D37EC400DA6239 /* tree.cpp in Sources */, A7E9B98012D37EC400DA6239 /* treesharedcommand.cpp in Sources */, A7E9B98112D37EC400DA6239 /* treemap.cpp in Sources */, 4889EA221E8962D50054E0BB /* summary.cpp in Sources */, A7E9B98212D37EC400DA6239 /* treenode.cpp in Sources */, A7E9B98312D37EC400DA6239 /* trimflowscommand.cpp in Sources */, A7E9B98412D37EC400DA6239 /* trimseqscommand.cpp in Sources */, A7E9B98512D37EC400DA6239 /* unifracunweightedcommand.cpp in Sources */, A7E9B98612D37EC400DA6239 /* unifracweightedcommand.cpp in Sources */, A7E9B98712D37EC400DA6239 /* unweighted.cpp in Sources */, A7E9B98812D37EC400DA6239 /* uvest.cpp in Sources */, A7E9B98912D37EC400DA6239 /* validcalculator.cpp in Sources */, 48E544591E9C2E6500FF6AB8 /* fp.cpp in Sources */, A7E9B98A12D37EC400DA6239 /* validparameter.cpp in Sources */, A7E9B98B12D37EC400DA6239 /* venn.cpp in Sources */, 48ED1E7D235E1BB4003E66F7 /* interactengine.cpp in Sources */, 484976E32255412400F3A291 /* igabundance.cpp in Sources */, A7E9B98C12D37EC400DA6239 /* venncommand.cpp in Sources */, A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */, A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */, A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */, A70332B712D3A13400761E33 /* Makefile in Sources */, A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */, A7FC486712D795D60055BC5C /* pcacommand.cpp in Sources */, A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */, A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */, A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */, A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */, 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */, 48E544551E9C2DF500FF6AB8 /* tn.cpp in Sources */, 4815BEC922970FA700677EE2 /* sishift.cpp in Sources */, A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */, A75790591301749D00A30DAB /* homovacommand.cpp in Sources */, 481623E21B56A2DB004C60B7 /* pcrseqscommand.cpp in Sources */, 484976E72256799100F3A291 /* diversityestimatorcommand.cpp in Sources */, A7FA10021302E097003860FE /* mantelcommand.cpp in Sources */, A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */, A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */, A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */, A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */, A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */, A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */, A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */, A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */, 48E7E0A62278AD4800B74910 /* diversityutils.cpp in Sources */, A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */, A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */, A7FF19F2140FFDA500AD216D /* trimoligos.cpp in Sources */, A7F9F5CF141A5E500032F693 /* sequenceparser.cpp in Sources */, 48E544611E9C2FB800FF6AB8 /* fpfn.cpp in Sources */, A7FFB558142CA02C004884F2 /* summarytaxcommand.cpp in Sources */, A7BF221414587886000AD524 /* myPerseus.cpp in Sources */, A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */, A774101414695AF60098E6AC /* shhhseqscommand.cpp in Sources */, A774104814696F320098E6AC /* myseqdist.cpp in Sources */, 835FE03E19F00A4D005AA754 /* svm.cpp in Sources */, 48ED1E8523689DE8003E66F7 /* srainfocommand.cpp in Sources */, A77410F614697C300098E6AC /* seqnoise.cpp in Sources */, A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */, 48705AC619BE32C50075E977 /* mergesfffilecommand.cpp in Sources */, 48E544491E9C2BE100FF6AB8 /* specificity.cpp in Sources */, 481E40DB244DFF5A0059C925 /* onegapignore.cpp in Sources */, A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */, A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */, 48F1C16A23D78D7B0034DAAF /* sharedclrvectors.cpp in Sources */, A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */, 48C51DF01A76B888004ECDF1 /* fastqread.cpp in Sources */, A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */, A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */, A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */, A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */, A7D755DA1535F679009BF21A /* treereader.cpp in Sources */, 48998B69242E785100DBD0A9 /* onegapdist.cpp in Sources */, A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */, 219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */, 219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */, A7A0671A1562946F0095C8C5 /* listotuscommand.cpp in Sources */, A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */, A70056E6156A93D000924A2D /* getotuscommand.cpp in Sources */, A70056EB156AB6E500924A2D /* removeotuscommand.cpp in Sources */, A74D59A4159A1E2000043046 /* counttable.cpp in Sources */, 48E5447C1E9D3F0400FF6AB8 /* fdr.cpp in Sources */, 484976DF22552E0B00F3A291 /* erarefaction.cpp in Sources */, A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */, 481E40E3244F6A050059C925 /* eachgapdist.cpp in Sources */, A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */, A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */, 835FE03D19F00640005AA754 /* classifysvmsharedcommand.cpp in Sources */, 48E5444D1E9C2C8F00FF6AB8 /* tptn.cpp in Sources */, 4815BEB4228B371E00677EE2 /* lnshift.cpp in Sources */, A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */, A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */, A721AB71161C572A009860A1 /* kmernode.cpp in Sources */, 48CF76F021BEBDD300B2FB5C /* mergeotuscommand.cpp in Sources */, A721AB72161C572A009860A1 /* kmertree.cpp in Sources */, A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */, A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */, A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */, A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */, 48576EA11D05DBC600BBC9C0 /* averagelinkage.cpp in Sources */, A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */, 4809ECA522831A5E00B4D0E5 /* lnabundance.cpp in Sources */, A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */, A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */, A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */, A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */, F4A86713268F5CCE0010479A /* kimura.cpp in Sources */, A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */, 4815BEB8228DD18400677EE2 /* lsabundance.cpp in Sources */, 485B0E0E1F27C40500CA5F57 /* sharedrabundfloatvector.cpp in Sources */, 48E7E0A32278A21B00B74910 /* metrolognormal.cpp in Sources */, A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */, A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */, A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */, A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */, A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */, A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */, A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */, A747EC71181EA0F900345732 /* sracommand.cpp in Sources */, A7132EB3184E792700AAA402 /* communitytype.cpp in Sources */, A7D395C4184FA3A200A350D7 /* kmeans.cpp in Sources */, A7222D731856277C0055A993 /* sharedjsd.cpp in Sources */, 4809EC98227B405700B4D0E5 /* metrologstudent.cpp in Sources */, A7B093C018579F0400843CD1 /* pam.cpp in Sources */, 48E544781E9D3CE400FF6AB8 /* npv.cpp in Sources */, A7A09B1018773C0E00FAA081 /* shannonrange.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXSourcesBuildPhase section */ /* Begin XCBuildConfiguration section */ 1DEB928608733DD80010E9CD /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = YES; CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_OBJC_WEAK = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; "CLANG_WARN_CXX0X_EXTENSIONS[arch=*]" = NO; CLANG_WARN_UNREACHABLE_CODE = YES; COPY_PHASE_STRIP = NO; DEPLOYMENT_LOCATION = YES; DSTROOT = ""; "DSTROOT[sdk=*]" = ""; "DYLIB_CURRENT_VERSION[sdk=*]" = ""; "FRAMEWORK_SEARCH_PATHS[arch=*]" = "${SRCROOT}/mothur_resources_10.14/libs"; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_DYNAMIC_NO_PIC = NO; GCC_MODEL_TUNING = G5; GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"/Users/swestcott/Desktop/release/;/Users/swestcott/Desktop/mothurbugs/\\\"\"", "VERSION=\"\\\"1.48.0\\\"\"", "MOTHUR_TOOLS=\"\\\"/Users/swestcott/Desktop/mothur/tools/;/Users/swestcott/Desktop/release/\\\"\"", "LOGFILE_NAME=\"\\\"./mothur.logfile\\\"\"", ); GCC_VERSION = ""; GCC_WARN_ABOUT_MISSING_PROTOTYPES = NO; GCC_WARN_UNINITIALIZED_AUTOS = NO; GCC_WARN_UNUSED_FUNCTION = YES; INSTALL_PATH = "${SRCROOT}"; "INSTALL_PATH[sdk=*]" = ""; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/libs"; "LIBRARY_SEARCH_PATHS[arch=*]" = ""; MACOSX_DEPLOYMENT_TARGET = 10.14; ONLY_ACTIVE_ARCH = YES; OTHER_LDFLAGS = ( "-lreadline", "${SRCROOT}/mothur_resources_10.14/libs/libz.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_filesystem.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_iostreams.a", "${SRCROOT}/mothur_resources_10.14/libs/libgsl.a", "${SRCROOT}/mothur_resources_10.14/libs/libgslcblas.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5.a", ); PRELINK_LIBS = ""; PRODUCT_NAME = mothur; SDKROOT = macosx; SKIP_INSTALL = NO; USER_HEADER_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/headers/"; }; name = Debug; }; 1DEB928708733DD80010E9CD /* Release */ = { isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = YES; CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_OBJC_WEAK = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; CLANG_WARN_UNREACHABLE_CODE = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEPLOYMENT_LOCATION = YES; DSTROOT = ""; "FRAMEWORK_SEARCH_PATHS[arch=*]" = "${SRCROOT}/mothur_resources_10.14/libs"; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_MODEL_TUNING = G5; GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"/Users/swestcott/Desktop/release\\\"\"", "VERSION=\"\\\"1.48.0\\\"\"", "LOGFILE_NAME=\"\\\"./mothur.logfile\\\"\"", "MOTHUR_TOOLS=\"\\\"/Users/swestcott/desktop/mothur/tools/\\\"\"", ); GCC_VERSION = ""; GCC_WARN_ABOUT_MISSING_PROTOTYPES = NO; GCC_WARN_UNINITIALIZED_AUTOS = NO; GCC_WARN_UNUSED_VALUE = YES; INSTALL_PATH = "${SRCROOT}"; "INSTALL_PATH[sdk=*]" = ""; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/libs"; "LIBRARY_SEARCH_PATHS[arch=*]" = ""; MACOSX_DEPLOYMENT_TARGET = 10.14; ONLY_ACTIVE_ARCH = YES; OTHER_LDFLAGS = ( "-lreadline", "${SRCROOT}/mothur_resources_10.14/libs/libz.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_filesystem.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_iostreams.a", "${SRCROOT}/mothur_resources_10.14/libs/libgsl.a", "${SRCROOT}/mothur_resources_10.14/libs/libgslcblas.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5.a", ); PRELINK_LIBS = ""; PRODUCT_NAME = mothur; SDKROOT = macosx; SKIP_INSTALL = NO; USER_HEADER_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/headers/"; "VALID_ARCHS[sdk=*]" = "i386 x86_64"; }; name = Release; }; 1DEB928A08733DD80010E9CD /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { CLANG_ADDRESS_SANITIZER_CONTAINER_OVERFLOW = YES; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; DEPLOYMENT_LOCATION = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_ENABLE_SSE3_EXTENSIONS = NO; GCC_ENABLE_SSE41_EXTENSIONS = NO; GCC_ENABLE_SSE42_EXTENSIONS = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"/Users/swestcott/Desktop/release\\\"\"", "VERSION=\"\\\"1.47.0\\\"\"", "LOGFILE_NAME=\"\\\"./mothur.logfile\\\"\"", "MOTHUR_TOOLS=\"\\\"/Users/swestcott/desktop/mothur/tools/\\\"\"", ); GCC_VERSION = ""; "GCC_VERSION[arch=*]" = ""; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_MISSING_PROTOTYPES = NO; GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "", "${SRCROOT}/mothur_resources_10.14/headers", ); "HEADER_SEARCH_PATHS[arch=*]" = ( "$(inherited)", /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include, /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk/usr/include/, ); INSTALL_PATH = "${SRCROOT}"; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/libs/"; "LIBRARY_SEARCH_PATHS[arch=*]" = ""; MACH_O_TYPE = mh_execute; MACOSX_DEPLOYMENT_TARGET = 11.0; ONLY_ACTIVE_ARCH = YES; OTHER_CPLUSPLUSFLAGS = ( "-DUNIT_TEST", "-DUSE_BOOST", "-DUSE_READLINE", "$(OTHER_CFLAGS)", "-DUSE_GSL", "-DUSE_HDF5", ); OTHER_LDFLAGS = ( "${SRCROOT}/mothur_resources_10.14/libs/libboost_filesystem.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_iostreams.a", "${SRCROOT}/mothur_resources_10.14/libs/libgsl.a", "${SRCROOT}/mothur_resources_10.14/libs/libgslcblas.a", "${SRCROOT}/mothur_resources_10.14/libs/libz.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5.a", "-libreadline", ); SDKROOT = macosx; SKIP_INSTALL = NO; USER_HEADER_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/headers"; "USER_HEADER_SEARCH_PATHS[arch=*]" = ""; }; name = Debug; }; 1DEB928B08733DD80010E9CD /* Release */ = { isa = XCBuildConfiguration; buildSettings = { CLANG_ADDRESS_SANITIZER_CONTAINER_OVERFLOW = YES; CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_CXX_LANGUAGE_STANDARD = "compiler-default"; CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; DEPLOYMENT_LOCATION = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_GENERATE_DEBUGGING_SYMBOLS = NO; GCC_MODEL_TUNING = ""; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "VERSION=\"\\\"1.47.0\\\"\"", "MOTHUR_FILES=\"\\\"/Users/swestcott/Desktop/release\\\"\"", "LOGFILE_NAME=\"\\\"./mothur.logfile\\\"\"", "MOTHUR_TOOLS=\"\\\"/Users/swestcott/desktop/mothur/tools/\\\"\"", ); GCC_VERSION = ""; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_MISSING_PROTOTYPES = NO; GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_MISSING_PARENTHESES = YES; GCC_WARN_MULTIPLE_DEFINITION_TYPES_FOR_SELECTOR = YES; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VALUE = YES; GCC_WARN_UNUSED_VARIABLE = YES; HEADER_SEARCH_PATHS = ( "", "${SRCROOT}/mothur_resources_10.14/headers", ); "HEADER_SEARCH_PATHS[arch=*]" = ( "$(inherited)", /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include, /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk/usr/include/, ); INSTALL_PATH = "${SRCROOT}"; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/libs/"; "LIBRARY_SEARCH_PATHS[arch=*]" = ""; MACH_O_TYPE = mh_execute; MACOSX_DEPLOYMENT_TARGET = 11.0; OTHER_CPLUSPLUSFLAGS = ( "-DUSE_READLINE", "-DUNIT_TEST", "-DUSE_BOOST", "$(OTHER_CFLAGS)", "-DUSE_GSL", "-DUSE_HDF5", ); OTHER_LDFLAGS = ( "${SRCROOT}/mothur_resources_10.14/libs/libboost_filesystem.a", "${SRCROOT}/mothur_resources_10.14/libs/libboost_iostreams.a", "${SRCROOT}/mothur_resources_10.14/libs/libgsl.a", "${SRCROOT}/mothur_resources_10.14/libs/libgslcblas.a", "${SRCROOT}/mothur_resources_10.14/libs/libz.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_cpp.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5_hl.a", "${SRCROOT}/mothur_resources_10.14/libs/libhdf5.a", "-libreadline", ); SDKROOT = macosx; SKIP_INSTALL = NO; USER_HEADER_SEARCH_PATHS = "${SRCROOT}/mothur_resources_10.14/headers"; "USER_HEADER_SEARCH_PATHS[arch=*]" = ""; }; name = Release; }; 481FB51D1AC0A63E0076CFF3 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = YES; CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_EMPTY_BODY = NO; CLANG_WARN_ENUM_CONVERSION = NO; CLANG_WARN_INT_CONVERSION = NO; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; FRAMEWORK_SEARCH_PATHS = "${SRCROOT}/mothur_resources/libs"; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_DYNAMIC_NO_PIC = NO; GCC_OPTIMIZATION_LEVEL = 0; GCC_PREPROCESSOR_DEFINITIONS = ( "DEBUG=1", "$(inherited)", ); GCC_SYMBOLS_PRIVATE_EXTERN = NO; GCC_VERSION = com.apple.compilers.llvm.clang.1_0; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = NO; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources/libs"; "LIBRARY_SEARCH_PATHS[arch=*]" = /usr/local/lib/; MACOSX_DEPLOYMENT_TARGET = 10.14; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; OTHER_LDFLAGS = ( "-lreadline", "-lz", ); PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; USER_HEADER_SEARCH_PATHS = "${PROJECT_DIR}/gtest /usr/local/include"; }; name = Debug; }; 481FB51E1AC0A63E0076CFF3 /* Release */ = { isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = YES; CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_EMPTY_BODY = NO; CLANG_WARN_ENUM_CONVERSION = NO; CLANG_WARN_INT_CONVERSION = NO; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; FRAMEWORK_SEARCH_PATHS = "${SRCROOT}/mothur_resources/libs"; GCC_C_LANGUAGE_STANDARD = "compiler-default"; GCC_OPTIMIZATION_LEVEL = 0; GCC_PREPROCESSOR_DEFINITIONS = ( "VERSION=\"\\\"1.40.5\\\"\"", "RELEASE_DATE=\"\\\"06/20/2018\\\"\"", ); GCC_VERSION = com.apple.compilers.llvm.clang.1_0; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = NO; LIBRARY_SEARCH_PATHS = "${SRCROOT}/mothur_resources/libs"; "LIBRARY_SEARCH_PATHS[arch=*]" = /usr/local/lib/; MACOSX_DEPLOYMENT_TARGET = 10.14; MTL_ENABLE_DEBUG_INFO = NO; ONLY_ACTIVE_ARCH = YES; OTHER_LDFLAGS = ( "-lreadline", "-lz", ); PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; USER_HEADER_SEARCH_PATHS = "$(TARGET_BUILD_DIR)/gtest /usr/local/include"; }; name = Release; }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928608733DD80010E9CD /* Debug */, 1DEB928708733DD80010E9CD /* Release */, ); defaultConfigurationIsVisible = 0; defaultConfigurationName = Debug; }; 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928A08733DD80010E9CD /* Debug */, 1DEB928B08733DD80010E9CD /* Release */, ); defaultConfigurationIsVisible = 0; defaultConfigurationName = Debug; }; 481FB51F1AC0A63E0076CFF3 /* Build configuration list for PBXNativeTarget "TestMothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 481FB51D1AC0A63E0076CFF3 /* Debug */, 481FB51E1AC0A63E0076CFF3 /* Release */, ); defaultConfigurationIsVisible = 0; defaultConfigurationName = Debug; }; /* End XCConfigurationList section */ }; rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; } mothur-1.48.0/Products/000077500000000000000000000000001424121717000147245ustar00rootroot00000000000000mothur-1.48.0/Products/Debug/000077500000000000000000000000001424121717000157525ustar00rootroot00000000000000mothur-1.48.0/Products/Debug/123.log000066400000000000000000000011001424121717000167520ustar00rootroot00000000000000 Setting logfile name to 123.log mothur > summary.seqs(fasta=final.fasta) Unable to open final.fasta. Trying default /Users/swestcott/Desktop/release/final.fasta. Using 16 processors. Start End NBases Ambigs Polymer NumSeqs Minimum: 1 375 249 0 3 1 2.5%-tile: 1 375 252 0 4 61 25%-tile: 1 375 252 0 4 607 Median: 1 375 253 0 4 1213 75%-tile: 1 375 253 0 5 1819 97.5%-tile: 1 375 254 0 6 2365 Maximum: 1 375 256 0 6 2425 Mean: 1 375 252 0 4 # of Seqs: 2425 It took 0 secs to summarize 2425 sequences. Output File Names: /Users/swestcott/Desktop/release/final.summary mothur-1.48.0/Products/Debug/mothur 2000077700000000000000000000000001424121717000415362/Users/swestcott/Desktop/mothur/Build/Intermediates.noindex/UninstalledProducts/macosx/mothurustar00rootroot00000000000000mothur-1.48.0/README.md000066400000000000000000000020751424121717000144040ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/mothur/mothur.svg?branch=master)](https://travis-ci.org/mothur/mothur) # README Welcome to the mothur project, initiated by Dr. Patrick Schloss and his software development team in the Department of Microbiology & Immunology at The University of Michigan. This project seeks to develop a single piece of open-source, expandable software to fill the bioinformatics needs of the microbial ecology community. mothur is available under the GPL license. Useful links... * [The current release](https://github.com/mothur/mothur/releases/latest) * [Wiki documentation](http://www.mothur.org/wiki) * [User forum](http://www.mothur.org/forum) * [Blog](http://www.mothur.org/forum) SOPs... * [MiSeq](http://www.mothur.org/wiki/MiSeq_SOP) * [454](http://www.mothur.org/wiki/454_SOP) References... * [SILVA](http://www.mothur.org/wiki/Silva_reference_files) * [greengenes](http://www.mothur.org/wiki/Greengenes-formatted_databases) * [RDP](http://www.mothur.org/wiki/RDP_reference_files) See the [citation file](CITATION.md) for how to cite mothur.mothur-1.48.0/TestBatches/000077500000000000000000000000001424121717000153325ustar00rootroot00000000000000mothur-1.48.0/TestBatches/README000066400000000000000000000037631424121717000162230ustar00rootroot00000000000000Running Test Bathches: 1. Download TestReferences and TestFiles from XXX and put in same location as mothur executable 2. Download TestBatches and put in same location as mothur's executable 3. Download mothur's external programs: vsearch, uchime, prefetch and fasterq-dump. Available with mothur's executable versions 4. Run an individual batch test: sarahwestcott$ cd mothur_test_batches mothur_test_batches sarahwestcott$ ls TestBatches TestReferences fasterq-dump prefetch vsearch TestFiles mothur uchime mothur_test_batches sarahwestcott$ ./mothur "./TestBatches/align.seqs/batch" Output to screen will look like: mothur > set.dir(input=./TestFiles, output=./TestResults/align.seqs, tempdefault=./TestReferences) Mothur's directories: outputDir=/Users/sarahwestcott/Desktop/mothur_test_batches/align.seqs/ inputDir=/Users/sarahwestcott/Desktop/mothur_test_batches/TestFiles/ tempDefault=/Users/sarahwestcott/Desktop/mothur_test_batches/TestReferences/ mothur > set.logfile(name=alignseqs.logfile) ... 5. Run all test batches: sarahwestcott$ cd mothur_test_batches mothur_test_batches sarahwestcott$ ls TestBatches TestReferences fasterq-dump prefetch vsearch TestFiles mothur uchime mothur_test_batches sarahwestcott$ ./mothur "./TestBatches/master_batch" Output to screen will look like: ... Batch Mode /*****************************************************************************/ mothur > set.dir(input=./TestFiles, output=./TestResults/align.seqs, tempdefault=./TestReferences) Mothur's directories: outputDir=/Users/swestcott/Desktop/batchTest/TestResults/align.seqs/ inputDir=/Users/swestcott/Desktop/batchTest/TestFiles/ tempDefault=/Users/swestcott/Desktop/batchTest/TestReferences/ mothur > set.logfile(name=alignseqs.logfile) Setting logfile name to /Users/swestcott/Desktop/batchTest/TestFiles/alignseqs.logfile ... mothur-1.48.0/TestBatches/align.seqs/000077500000000000000000000000001424121717000173765ustar00rootroot00000000000000mothur-1.48.0/TestBatches/align.seqs/batch000066400000000000000000000012071424121717000204020ustar00rootroot00000000000000#align.seqs # testAlign is stability.trim.contigs.good.unique.fasta from MiSeq_SOP # test different align method and seaches # NOTE: download TestReferences and TestFiles and put in same location as mothur executable set.dir(input=./TestFiles, output=./TestResults/align.seqs, tempdefault=./TestReferences) set.logfile(name=alignseqs.logfile) align.seqs(fasta=testAlign.fasta, reference=silva.v4.fasta) summary.seqs(fasta=current) align.seqs(fasta=testAlign.fasta, reference=silva.v4.fasta, align=needleman) summary.seqs(fasta=current) align.seqs(fasta=testAlign.fasta, reference=silva.v4.fasta, align=gotoh) summary.seqs(fasta=current) quit() mothur-1.48.0/TestBatches/amova/000077500000000000000000000000001424121717000164355ustar00rootroot00000000000000mothur-1.48.0/TestBatches/amova/batch000066400000000000000000000005551424121717000174460ustar00rootroot00000000000000#amova # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # testRarefied.dist is stability.opti_mcc.thetayc.0.03.lt.ave.dist from MiSeq_SOP set.dir(input=./TestFiles, output=./TestResults/amova, tempdefault=./TestReferences) set.logfile(name=amova.logfile) amova(phylip=testRarefied.dist, design=mouse.time.design) quit() mothur-1.48.0/TestBatches/anosim/000077500000000000000000000000001424121717000166205ustar00rootroot00000000000000mothur-1.48.0/TestBatches/anosim/batch000066400000000000000000000005601424121717000176250ustar00rootroot00000000000000#anosim # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # testRarefied.dist is stability.opti_mcc.thetayc.0.03.lt.ave.dist from MiSeq_SOP set.dir(input=./TestFiles, output=./TestResults/anosim, tempdefault=./TestReferences) set.logfile(name=anosim.logfile) anosim(phylip=testRarefied.dist, design=mouse.time.design) quit() mothur-1.48.0/TestBatches/binseqs/000077500000000000000000000000001424121717000167765ustar00rootroot00000000000000mothur-1.48.0/TestBatches/binseqs/batch000066400000000000000000000011401424121717000177760ustar00rootroot00000000000000#bin.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable #Test files -> final.fasta, final.names, final.count_table, final.groups from MiSeq_SOP set.dir(input=./TestFiles, output=./binseqs, tempdefault=./TestReferences) set.logfile(name=binseqs.logfile) bin.seqs(fasta=final2.fasta, name=final.names, list=final2.opti_mcc.list) bin.seqs(fasta=final2.fasta, name=final.names, group=final.groups, list=final2.opti_mcc.list) bin.seqs(fasta=final.fasta, list=final.opti_mcc.list) bin.seqs(fasta=final.fasta, count=final.count_table, list=final.opti_mcc.list) quit() mothur-1.48.0/TestBatches/biom.info/000077500000000000000000000000001424121717000172125ustar00rootroot00000000000000mothur-1.48.0/TestBatches/biom.info/batch000066400000000000000000000010651424121717000202200ustar00rootroot00000000000000#biom.info # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # test different biom formats #Test files -> hdf5.min.biom & hdf5.rich.biom from https://github.com/biocore/biom-format/tree/master/examples #Test files -> final.biom created by mothur make.biom file with final.opti_mcc.shared, final.cons_taxonomy set.dir(input=./TestFiles, output=./TestResults/biom.info) set.logfile(name=biominfo.logfile) biom.info(biom=hdf5.min.biom) biom.info(biom=hdf5.rich.biom, label=0.03, format=hdf5) biom.info(biom=final.biom) quit() mothur-1.48.0/TestBatches/chimera.bellerophon/000077500000000000000000000000001424121717000212525ustar00rootroot00000000000000mothur-1.48.0/TestBatches/chimera.bellerophon/batch000066400000000000000000000012501424121717000222540ustar00rootroot00000000000000#chimera.bellerophon # chimera.bellerophon is very slow, so this batch test will take a very long time. (~7000 secs / ~2 hours) # chimera.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.fasta from MISeq_SOP # chimera.count is stability.trim.contigs.good.unique.good.filter.unique.precluster.count_table from MISeq_SOP # NOTE: download TestReferences and TestFiles and put in same location as mothur executable set.dir(input=./TestFiles, output=./TestResults/chimera.bellerophon, tempdefault=./TestReferences) set.logfile(name=chimera.bellerophon.logfile) chimera.bellerophon(fasta=chimera.fasta) chimera.bellerophon(fasta=chimera.fasta, filter=t) quit() mothur-1.48.0/TestBatches/chimera.vsearch/000077500000000000000000000000001424121717000203745ustar00rootroot00000000000000mothur-1.48.0/TestBatches/chimera.vsearch/batch000066400000000000000000000017261424121717000214060ustar00rootroot00000000000000#chimera.vsearch # chimera.vsearch is a wrapper for the vsearch program written by https://github.com/torognes/vsearch # chimera.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.fasta from MISeq_SOP # chimera.count is stability.trim.contigs.good.unique.good.filter.unique.precluster.count_table from MISeq_SOP # name and group parameters untested, because the name and group files are auto converted to a count file before running # this conversion is tested in the pre.cluster commands use of name and groups # NOTE: download TestReferences and TestFiles and put in same location as mothur executable set.dir(input=./TestFiles, output=./TestResults/chimera.vsearch, tempdefault=./TestReferences) set.logfile(name=chimera.vsearch.logfile) chimera.vsearch(fasta=chimera.fasta, reference=silva.gold.ng.fasta) chimera.vsearch(fasta=chimera.fasta, count=chimera.count_table) chimera.vsearch(fasta=chimera.fasta, count=chimera.count_table, dereplicate=t) quit() mothur-1.48.0/TestBatches/classify.seqs/000077500000000000000000000000001424121717000201215ustar00rootroot00000000000000mothur-1.48.0/TestBatches/classify.seqs/batch000066400000000000000000000022631424121717000211300ustar00rootroot00000000000000#classify.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # testClassify.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta from MiSeq_SOP # testClassify.count is , count=stability.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table from MiSeq_SOP # test different classification methods and seaches set.dir(input=./TestFiles, output=./TestResults/classify.seqs, tempdefault=./TestReferences) set.logfile(name=classifyseqs.logfile) classify.seqs(fasta=testClassify.fasta, reference=trainset18_062020.pds.fasta, taxonomy=trainset18_062020.pds.tax) classify.seqs(fasta=testClassify.fasta, reference=trainset18_062020.pds.fasta, taxonomy=trainset18_062020.pds.tax, relabund=t) classify.seqs(fasta=testClassify.fasta, reference=trainset18_062020.pds.fasta, taxonomy=trainset18_062020.pds.tax, probs=f) classify.seqs(fasta=testClassify.fasta, count=testClassify.count_table, reference=trainset18_062020.pds.fasta, taxonomy=trainset18_062020.pds.tax) # #knn classify.seqs(fasta=testClassify.fasta, template=trainset18_062020.pds.fasta, taxonomy=trainset18_062020.pds.tax, method=knn) quit() mothur-1.48.0/TestBatches/cluster.split/000077500000000000000000000000001424121717000201455ustar00rootroot00000000000000mothur-1.48.0/TestBatches/cluster.split/batch000066400000000000000000000026521424121717000211560ustar00rootroot00000000000000#cluster.split # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # testCluster.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.fasta from MiSeq_SOP # testCluster.count is , count=stability.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.pick.count_table from MiSeq_SOP # testCluster.taxonomy is , taxonomy=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pds.wang.pick.pick.taxonomy from MiSeq_SOP # test different cluster methods and seaches #set.dir(input=./TestFiles, output=./TestResults/cluster.split, tempdefault=./TestReferences) set.dir(input=/Users/swestcott/Desktop/release) set.logfile(name=clustersplit.logfile) cluster.split(fasta=testCluster.fasta, count=testCluster.count_table, taxonomy=testCluster.taxonomy, taxlevel=4, cutoff=0.03) cluster.split(fasta=testCluster.fasta, count=testCluster.count_table, taxonomy=testCluster.taxonomy, taxlevel=4, cutoff=0.03, method=agc) cluster.split(fasta=testCluster.fasta, count=testCluster.count_table, taxonomy=testCluster.taxonomy, taxlevel=4, cutoff=0.03, method=dgc) cluster.split(fasta=testCluster.fasta, count=testCluster.count_table, taxonomy=testCluster.taxonomy, taxlevel=4, cutoff=0.03, cluster=f) cluster.split(file=current) cluster.split(fasta=UNITEv6_sh_97.fasta, count=UNITEv6_sh_97.count_table, taxonomy=UNITEv6_sh_97.tax, cutoff=0.03) quit() mothur-1.48.0/TestBatches/dist.seqs/000077500000000000000000000000001424121717000172475ustar00rootroot00000000000000mothur-1.48.0/TestBatches/dist.seqs/batch000066400000000000000000000014121424121717000202510ustar00rootroot00000000000000#dist.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # dist.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.fasta from MiSeq_SOP # # test different classification methods and seaches #set.dir(input=./TestFiles, output=./TestResults/dist.seqs, tempdefault=./TestReferences) set.logfile(name=distseqs.logfile) dist.seqs(fasta=dist.fasta, calc=jtt, cutoff=0.05) dist.seqs(fasta=dist.fasta, calc=pmb, cutoff=0.03) dist.seqs(fasta=dist.fasta, calc=pam, cutoff=0.03) dist.seqs(fasta=dist.fasta, calc=kimura, cutoff=0.03) dist.seqs(fasta=dist.fasta, calc=onegap, cutoff=0.03) dist.seqs(fasta=dist.fasta, calc=nogaps, cutoff=0.10) dist.seqs(fasta=dist.fasta, calc=eachgap, cutoff=0.15) quit() mothur-1.48.0/TestBatches/make.biom/000077500000000000000000000000001424121717000171745ustar00rootroot00000000000000mothur-1.48.0/TestBatches/make.biom/batch000066400000000000000000000020711424121717000202000ustar00rootroot00000000000000#make.biom # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # test different biom formats #Test files -> makeBiom.cons.taxonomy is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.opti_mcc.0.03.cons.taxonomy from the MiSeq_SOP #Test files -> makeBiom.gg.cons.taxonomy is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.opti_mcc.0.03.cons.taxonomy from the MiSeq_SOP classified using the Green Genes Database #Test files -> makeBiom.shared is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.opti_mcc.shared set.dir(input=./TestFiles, output=./TestResults/make.biom) set.logfile(name=make.biom.logfile) make.biom(shared=makeBiom.shared) make.biom(shared=makeBiom.shared, constaxonomy=makeBiom.cons.taxonomy) make.biom(shared=makeBiom.shared, constaxonomy=makeBiom.cons.taxonomy, output=simple) make.biom(shared=makeBiom.gg.shared, label=0.03, reftaxonomy=gg_13_5_99.gg.tax, constaxonomy=makeBiom.gg.cons.taxonomy, picrust=97_otu_map.txt) quit() mothur-1.48.0/TestBatches/make.contigs/000077500000000000000000000000001424121717000177145ustar00rootroot00000000000000mothur-1.48.0/TestBatches/make.contigs/batch000066400000000000000000000020201424121717000207120ustar00rootroot00000000000000#make.contigs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # stability.files & *.fastq *.fastq.gz from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/make.contigs, tempdefault=./TestReferences) set.logfile(name=makecontigs.logfile) make.contigs(file=stability.files, maxambig=0, maxlength=275) summary.seqs(count=current) make.contigs(file=stability.gz.files) summary.seqs(fasta=current) make.contigs(ffastq=F3D150_S216_L001_R1_001.fastq.gz, rfastq=F3D150_S216_L001_R2_001.fastq.gz) summary.seqs(fasta=current) make.contigs(ffastq=F3D150_S216_L001_R1_001.fastq, rfastq=F3D150_S216_L001_R2_001.fastq) summary.seqs(fasta=current) make.contigs(ffasta=F3D150_S216_L001_R1_001.fasta, rfasta=F3D150_S216_L001_R2_001.fasta, rqfile=F3D150_S216_L001_R1_001.qual, fqfile=F3D150_S216_L001_R2_001.qual) summary.seqs(fasta=current) #make.contigs(ffastq=small.forward.fastq, rfastq=small.reverse.fastq, oligos=qatar.oligos, pdiffs=2, bdiffs=1, checkorient=t) #summary.seqs(fasta=current) quit() mothur-1.48.0/TestBatches/make.file/000077500000000000000000000000001424121717000171655ustar00rootroot00000000000000mothur-1.48.0/TestBatches/make.file/batch000066400000000000000000000007371424121717000202000ustar00rootroot00000000000000#make.file # NOTE: download TestReferences and TestFiles and put in same location as mothur executable set.dir(input=./TestFiles, output=./TestResults/make.file, tempdefault=./TestReferences) set.logfile(name=makefile.logfile) make.file(inputdir=./TestFiles) make.file(inputdir=./TestFiles, numcols=2) make.file(inputdir=./TestFiles, type=gz) make.file(inputdir=./TestFiles, prefix=myReallyAwesomeData) make.file(inputdir=./TestFiles, type=gz, prefix=myReallyAwesomeData) quit() mothur-1.48.0/TestBatches/master_batch.sh000077500000000000000000000010021424121717000203160ustar00rootroot00000000000000#!/bin/bash IGNORE_TESTS=() IGNORE_TESTS+=(chimera.bellerophon) TEST_DIR=TestBatches if [ "x$1" != "x" ] ; then TEST_DIR=$1 fi MOTHUR_EXEC=mothur for TEST_FILE in `find $TEST_DIR -type f -name batch` ; do PROCESS=True for IGNORE_TEST in ${IGNORE_TESTS[@]}; do if [ `echo $TEST_FILE | grep -v /$IGNORE_TEST/batch | wc -l` == 0 ] ; then PROCESS=False fi done if [ ${PROCESS} == 'True' ] ; then echo "Processing $TEST_FILE" ./mothur "$TEST_FILE" else echo "Ignoring $TEST_FILE" fi done mothur-1.48.0/TestBatches/pairwise.seqs/000077500000000000000000000000001424121717000201275ustar00rootroot00000000000000mothur-1.48.0/TestBatches/pairwise.seqs/batch000066400000000000000000000012431424121717000211330ustar00rootroot00000000000000#pairwise.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # pairwise.fasta is stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.fasta from MiSeq_SOP # # test different classification methods and seaches set.dir(input=./TestFiles, output=./TestResults/pairwise.seqs, tempdefault=./TestReferences) set.logfile(name=pairwiseseqs.logfile) pairwise.seqs(fasta=pairwise.fasta, kmercutoff=-0.5, cutoff=0.05) pairwise.seqs(fasta=pairwise.fasta, calc=onegap, cutoff=0.03) pairwise.seqs(fasta=pairwise.fasta, calc=nogaps, cutoff=0.10) pairwise.seqs(fasta=pairwise.fasta, calc=eachgap, cutoff=0.15) quit() mothur-1.48.0/TestBatches/pcr.seqs/000077500000000000000000000000001424121717000170705ustar00rootroot00000000000000mothur-1.48.0/TestBatches/pcr.seqs/batch000066400000000000000000000010131424121717000200670ustar00rootroot00000000000000#pcr.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # silva.bacteria.fasta from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/pcr.seqs, tempdefault=./TestReferences) set.logfile(name=pcrseqs.logfile) #trim to v4 pcr.seqs(fasta=silva.bacteria.fasta, start=11894, end=25319, keepdots=F) pcr.seqs(fasta=silva.bacteria.fasta, oligos=pcr.oligos, pdiffs=2, rdiffs=2) pcr.seqs(fasta=silva.bacteria.fasta, oligos=pcr.oligos, pdiffs=2, rdiffs=1, checkorient=t) quit() mothur-1.48.0/TestBatches/pre.cluster/000077500000000000000000000000001424121717000176005ustar00rootroot00000000000000mothur-1.48.0/TestBatches/pre.cluster/batch000066400000000000000000000022141424121717000206030ustar00rootroot00000000000000#pre.cluster # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # fasta=stability.trim.contigs.good.unique.good.filter.unique.fasta, count=stability.trim.contigs.good.unique.good.filter.count_table,from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/pre.cluster, tempdefault=./TestReferences) set.logfile(name=precluster.logfile) pre.cluster(fasta=testPrecluster.fasta, count=testPrecluster.count_table, diffs=2) summary.seqs(count=current) pre.cluster(fasta=testPrecluster.fasta, count=testPrecluster.count_table, diffs=2, method=unoise) summary.seqs(count=current) pre.cluster(fasta=testPrecluster.fasta, count=testPrecluster.count_table, diffs=2, method=tree) summary.seqs(count=current) pre.cluster(fasta=testPrecluster.fasta, count=testPrecluster.count_table, diffs=2, method=deblur) summary.seqs(count=current) #test unaligned clustering pre.cluster(fasta=testPrecluster.ng.fasta, count=testPrecluster.count_table, diffs=2) summary.seqs(count=current) pre.cluster(fasta=testPrecluster.names_groups.fasta, name=testPrecluster.names, group=testPrecluster.groups, diffs=2) summary.seqs(count=current) quit() mothur-1.48.0/TestBatches/screen.seqs/000077500000000000000000000000001424121717000175635ustar00rootroot00000000000000mothur-1.48.0/TestBatches/screen.seqs/batch000066400000000000000000000037701424121717000205760ustar00rootroot00000000000000#screen.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # screenSeqs.fasta is stability.trim.contigs.fasta from MISeq_SOP # screenSeqs.groups is stability.contigs.groups from MISeq_SOP # screenSeqs.summary is stability.trim.contigs.summary from MISeq_SOP # screenSeqs.contigs.report is stability.contigs.report from MISeq_SOP # screenSeqs2.align is stability.trim.contigs.good.unique.align from MISeq_SOP # screenSeqs2.count_table is stability.trim.contigs.good.count_table from MISeq_SOP # screenSeqs2.summary is stability.trim.contigs.good.unique.summary from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/screen.seqs, tempdefault=./TestReferences) set.logfile(name=screen.seqs.logfile) #screening after make.contigs screen.seqs(fasta=screenSeqs.fasta, group=screenSeqs.groups, maxambig=0, maxlength=275) screen.seqs(fasta=screenSeqs.fasta, group=screenSeqs.groups, summary=screenSeqs.summary, maxambig=0, maxlength=275) #screening after alignment screen.seqs(fasta=screenSeqs2.align, count=screenSeqs2.count_table, start=1968, end=11550, maxhomop=8) screen.seqs(fasta=screenSeqs2.align, count=screenSeqs2.count_table, summary=screenSeqs2.summary, start=1968, end=11550, maxhomop=8) #additional parameters testing screen.seqs(fasta=screenSeqs2.align, count=screenSeqs2.count_table, optimize=start-end-maxambig, criteria=90) screen.seqs(fasta=screenSeqs.fasta, group=screenSeqs.groups, contigsreport=screenSeqs.contigs.report, mismatches=5) screen.seqs(fasta=screenSeqs.fasta, group=screenSeqs.groups, contigsreport=screenSeqs.contigs.report, minoverlap=25) screen.seqs(fasta=screenSeqs.fasta, group=screenSeqs.groups, contigsreport=screenSeqs.contigs.report, optimize=ostart-oend, criteria=90) screen.seqs(fasta=screenSeqs2.align, count=screenSeqs2.count_table, alignreport=screenSeqs2.align.report, minscore=60, maxinsert=5, minsim=90) screen.seqs(fasta=screenSeqs2.align, count=screenSeqs2.count_table, summary=screenSeqs2.summary, start=1968, end=11550, maxhomop=8) quit() mothur-1.48.0/TestBatches/stability.batch000066400000000000000000000036401424121717000203440ustar00rootroot00000000000000set.logfile(name=/Users/swestcott/Desktop/release/mothur.logfile) set.dir(input=/Users/swestcott/Desktop/release/, output=/Users/swestcott/Desktop/MiSeq_SOP/1.48.0_output) pcr.seqs(fasta=silva.bacteria.fasta, start=11894, end=25319, keepdots=F) rename.file(fasta=current, new=/Users/swestcott/Desktop/release/silva.v4.fasta) summary.seqs(fasta=silva.v4.fasta) make.contigs(file=stability.files, maxambig=0, maxlength=275) summary.seqs(count=current) unique.seqs(count=current) summary.seqs(count=current) align.seqs(fasta=stability.trim.contigs.unique.fasta, reference=silva.v4.fasta) summary.seqs(fasta=current, count=stability.trim.contigs.count_table) screen.seqs(fasta=current, count=current, start=1969, end=11551, maxhomop=8) summary.seqs(count=current) filter.seqs(fasta=current, vertical=T, trump=.) unique.seqs(fasta=current, count=current) pre.cluster(fasta=current, count=current, diffs=2) chimera.vsearch(fasta=current, count=current, dereplicate=t) summary.seqs(fasta=current, count=current) classify.seqs(fasta=current, count=current, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80) remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Archaea-Eukaryota) remove.groups(count=current, fasta=current, taxonomy=current, groups=Mock) cluster.split(fasta=current, count=current, taxonomy=current, taxlevel=4, cutoff=0.03) dist.seqs(fasta=current, cutoff=0.03) sens.spec(list=current, count=current, column=current, cutoff=0.03) make.shared(list=current, count=current, label=0.03) classify.otu(list=current, count=current, taxonomy=current, label=0.03) rename.file(fasta=current, count=current, taxonomy=current, shared=current, list=current, constaxonomy=current, prefix=final) clearcut(fasta=current, DNA=t) phylotype(taxonomy=current) make.shared(list=current, count=current, label=1) classify.otu(list=current, count=current, taxonomy=current, label=1) mothur-1.48.0/TestBatches/stability.part2.batch000066400000000000000000000032471424121717000213760ustar00rootroot00000000000000set.dir(input=/Users/swestcott/Desktop/release/, output=/Users/swestcott/Desktop/MiSeq_SOP/1.48.0_output) set.current(fasta=final.fasta, list=final.opti_mcc.list, shared=final.opti_mcc.shared, taxonomy=final.taxonomy, constaxonomy=final.cons.taxonomy, count=final.count_table) count.groups(shared=final.opti_mcc.shared) sub.sample(shared=final.opti_mcc.shared, size=2403) rarefaction.single(shared=final.opti_mcc.shared, calc=sobs, freq=100) summary.single(shared=final.opti_mcc.shared, calc=nseqs-coverage-sobs-invsimpson, subsample=T) dist.shared(shared=final.opti_mcc.shared, calc=thetayc-jclass, subsample=t) pcoa(phylip=final.opti_mcc.thetayc.0.03.lt.ave.dist) nmds(phylip=final.opti_mcc.thetayc.0.03.lt.ave.dist) nmds(phylip=final.opti_mcc.thetayc.0.03.lt.ave.dist, mindim=3, maxdim=3) amova(phylip=final.opti_mcc.thetayc.0.03.lt.ave.dist, design=mouse.time.design) homova(phylip=final.opti_mcc.thetayc.0.03.lt.ave.dist, design=mouse.time.design) corr.axes(axes=final.opti_mcc.thetayc.0.03.lt.ave.pcoa.axes, shared=final.opti_mcc.0.03.subsample.shared, method=spearman, numaxes=3) corr.axes(axes=final.opti_mcc.thetayc.0.03.lt.ave.pcoa.axes, metadata=mouse.dpw.metadata, method=spearman, numaxes=3) get.communitytype(shared=final.opti_mcc.0.03.subsample.shared) metastats(shared=final.opti_mcc.0.03.subsample.shared, design=mouse.time.design) lefse(shared=final.opti_mcc.0.03.subsample.shared, design=mouse.time.design) phylo.diversity(tree=final.phylip.tre, count=final.count_table, rarefy=T) unifrac.unweighted(tree=final.phylip.tre, count=final.count_table, distance=lt,random=F, subsample=t) unifrac.weighted(tree=final.phylip.tre, count=final.count_table, distance=lt, random=F, subsample=t) mothur-1.48.0/TestBatches/summary.seqs/000077500000000000000000000000001424121717000200015ustar00rootroot00000000000000mothur-1.48.0/TestBatches/summary.seqs/batch000066400000000000000000000006661424121717000210150ustar00rootroot00000000000000#summary.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # final.fasta, final.names, final.count from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/summary.seqs, tempdefault=./TestReferences) set.logfile(name=summaryseqs.logfile) summary.seqs(fasta=final.fasta) summary.seqs(fasta=final2.fasta, name=final.names) summary.seqs(fasta=final.fasta, count=final.count_table) quit() mothur-1.48.0/TestBatches/translate.seqs/000077500000000000000000000000001424121717000203015ustar00rootroot00000000000000mothur-1.48.0/TestBatches/translate.seqs/batch000066400000000000000000000007301424121717000213050ustar00rootroot00000000000000#translate.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # final.fasta from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/translate.seqs, tempdefault=./TestReferences) set.logfile(name=translateseqs.logfile) translate.seqs(fasta=final.fasta) translate.seqs(fasta=final.fasta, stop=f) translate.seqs(fasta=final.fasta, frames=1|-1|2|-2|3|-3) #translate.seqs(fasta=unalignedDNA, amino=alignedAminoAcid) quit() mothur-1.48.0/TestBatches/unique.seqs/000077500000000000000000000000001424121717000176125ustar00rootroot00000000000000mothur-1.48.0/TestBatches/unique.seqs/batch000066400000000000000000000013521424121717000206170ustar00rootroot00000000000000#unique.seqs # NOTE: download TestReferences and TestFiles and put in same location as mothur executable # uniqueSeqs.fasta is stability.trim.contigs.good.fasta from MISeq_SOP # uniqueSeqs2.fasta is stability.trim.contigs.good.unique.good.filter.fasta from MISeq_SOP # uniqueSeqs.count_table is stability.trim.contigs.good.good.count_table from MISeq_SOP # uniqueSeqs.names is stability.trim.contigs.good.names from MISeq_SOP set.dir(input=./TestFiles, output=./TestResults/unique.seqs, tempdefault=./TestReferences) set.logfile(name=unique.seqs.logfile) unique.seqs(fasta=uniqueSeqs.fasta) unique.seqs(fasta=uniqueSeqs.unique.fasta, name=uniqueSeqs.names, format=count) unique.seqs(fasta=uniqueSeqs2.fasta, count=uniqueSeqs.count_table) quit() mothur-1.48.0/TestMothur/000077500000000000000000000000001424121717000152375ustar00rootroot00000000000000mothur-1.48.0/TestMothur/catch.hpp000066400000000000000000013503301424121717000170370ustar00rootroot00000000000000/* * Catch v1.6.0 * Generated: 2017-01-11 16:38:09.405017 * ---------------------------------------------------------- * This file has been merged from multiple headers. Please don't edit it directly * Copyright (c) 2012 Two Blue Cubes Ltd. All rights reserved. * * Distributed under the Boost Software License, Version 1.0. (See accompanying * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ #ifndef TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED #define TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED #define TWOBLUECUBES_CATCH_HPP_INCLUDED #ifdef __clang__ # pragma clang system_header #elif defined __GNUC__ # pragma GCC system_header #endif // #included from: internal/catch_suppress_warnings.h #ifdef __clang__ # ifdef __ICC // icpc defines the __clang__ macro # pragma warning(push) # pragma warning(disable: 161 1682) # else // __ICC # pragma clang diagnostic ignored "-Wglobal-constructors" # pragma clang diagnostic ignored "-Wvariadic-macros" # pragma clang diagnostic ignored "-Wc99-extensions" # pragma clang diagnostic ignored "-Wunused-variable" # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wpadded" # pragma clang diagnostic ignored "-Wc++98-compat" # pragma clang diagnostic ignored "-Wc++98-compat-pedantic" # pragma clang diagnostic ignored "-Wswitch-enum" # pragma clang diagnostic ignored "-Wcovered-switch-default" # endif #elif defined __GNUC__ # pragma GCC diagnostic ignored "-Wvariadic-macros" # pragma GCC diagnostic ignored "-Wunused-variable" # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wpadded" #endif #if defined(CATCH_CONFIG_MAIN) || defined(CATCH_CONFIG_RUNNER) # define CATCH_IMPL #endif #ifdef CATCH_IMPL # ifndef CLARA_CONFIG_MAIN # define CLARA_CONFIG_MAIN_NOT_DEFINED # define CLARA_CONFIG_MAIN # endif #endif // #included from: internal/catch_notimplemented_exception.h #define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_H_INCLUDED // #included from: catch_common.h #define TWOBLUECUBES_CATCH_COMMON_H_INCLUDED // #included from: catch_compiler_capabilities.h #define TWOBLUECUBES_CATCH_COMPILER_CAPABILITIES_HPP_INCLUDED // Detect a number of compiler features - mostly C++11/14 conformance - by compiler // The following features are defined: // // CATCH_CONFIG_CPP11_NULLPTR : is nullptr supported? // CATCH_CONFIG_CPP11_NOEXCEPT : is noexcept supported? // CATCH_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods // CATCH_CONFIG_CPP11_IS_ENUM : std::is_enum is supported? // CATCH_CONFIG_CPP11_TUPLE : std::tuple is supported // CATCH_CONFIG_CPP11_LONG_LONG : is long long supported? // CATCH_CONFIG_CPP11_OVERRIDE : is override supported? // CATCH_CONFIG_CPP11_UNIQUE_PTR : is unique_ptr supported (otherwise use auto_ptr) // CATCH_CONFIG_CPP11_OR_GREATER : Is C++11 supported? // CATCH_CONFIG_VARIADIC_MACROS : are variadic macros supported? // CATCH_CONFIG_COUNTER : is the __COUNTER__ macro supported? // **************** // Note to maintainers: if new toggles are added please document them // in configuration.md, too // **************** // In general each macro has a _NO_ form // (e.g. CATCH_CONFIG_CPP11_NO_NULLPTR) which disables the feature. // Many features, at point of detection, define an _INTERNAL_ macro, so they // can be combined, en-mass, with the _NO_ forms later. // All the C++11 features can be disabled with CATCH_CONFIG_NO_CPP11 //inline std::string toString( std::nullptr_t null ) { // return "nullptr"; //} #ifdef __cplusplus # if __cplusplus >= 201103L # define CATCH_CPP11_OR_GREATER # endif # if __cplusplus >= 201402L # define CATCH_CPP14_OR_GREATER # endif #endif #ifdef __clang__ # if __has_feature(cxx_nullptr) # define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR # endif # if __has_feature(cxx_noexcept) # define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT # endif # if defined(CATCH_CPP11_OR_GREATER) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) # endif #endif // __clang__ //////////////////////////////////////////////////////////////////////////////// // Borland #ifdef __BORLANDC__ #endif // __BORLANDC__ //////////////////////////////////////////////////////////////////////////////// // EDG #ifdef __EDG_VERSION__ #endif // __EDG_VERSION__ //////////////////////////////////////////////////////////////////////////////// // Digital Mars #ifdef __DMC__ #endif // __DMC__ //////////////////////////////////////////////////////////////////////////////// // GCC #ifdef __GNUC__ # if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) # define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR # endif # if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) && defined(CATCH_CPP11_OR_GREATER) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS _Pragma( "GCC diagnostic ignored \"-Wparentheses\"" ) # endif // - otherwise more recent versions define __cplusplus >= 201103L // and will get picked up below #endif // __GNUC__ //////////////////////////////////////////////////////////////////////////////// // Visual C++ #ifdef _MSC_VER #if (_MSC_VER >= 1600) # define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR # define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR #endif #if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015)) #define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT #define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS #define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE #endif #endif // _MSC_VER //////////////////////////////////////////////////////////////////////////////// // Use variadic macros if the compiler supports them #if ( defined _MSC_VER && _MSC_VER > 1400 && !defined __EDGE__) || \ ( defined __WAVE__ && __WAVE_HAS_VARIADICS ) || \ ( defined __GNUC__ && __GNUC__ >= 3 ) || \ ( !defined __cplusplus && __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L ) #define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS #endif // Use __COUNTER__ if the compiler supports it #if ( defined _MSC_VER && _MSC_VER >= 1300 ) || \ ( defined __GNUC__ && __GNUC__ >= 4 && __GNUC_MINOR__ >= 3 ) || \ ( defined __clang__ && __clang_major__ >= 3 ) #define CATCH_INTERNAL_CONFIG_COUNTER #endif //////////////////////////////////////////////////////////////////////////////// // C++ language feature support // catch all support for C++11 #if defined(CATCH_CPP11_OR_GREATER) # if !defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) # define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR # endif # ifndef CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT # define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT # endif # ifndef CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS # define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS # endif # ifndef CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM # define CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM # endif # ifndef CATCH_INTERNAL_CONFIG_CPP11_TUPLE # define CATCH_INTERNAL_CONFIG_CPP11_TUPLE # endif # ifndef CATCH_INTERNAL_CONFIG_VARIADIC_MACROS # define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS # endif # if !defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) # define CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG # endif # if !defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) # define CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE # endif # if !defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) # define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR # endif # if !defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) # define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE # endif #endif // __cplusplus >= 201103L // Now set the actual defines based on the above + anything the user has configured #if defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NO_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_NULLPTR #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_NOEXCEPT #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_GENERATED_METHODS #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_NO_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_IS_ENUM #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_CPP11_NO_TUPLE) && !defined(CATCH_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_TUPLE #endif #if defined(CATCH_INTERNAL_CONFIG_VARIADIC_MACROS) && !defined(CATCH_CONFIG_NO_VARIADIC_MACROS) && !defined(CATCH_CONFIG_VARIADIC_MACROS) # define CATCH_CONFIG_VARIADIC_MACROS #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_NO_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_LONG_LONG #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_NO_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_OVERRIDE #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_NO_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_UNIQUE_PTR #endif #if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) # define CATCH_CONFIG_COUNTER #endif #if defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_NO_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_NO_CPP11) # define CATCH_CONFIG_CPP11_SHUFFLE #endif #if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS #endif // noexcept support: #if defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_NOEXCEPT) # define CATCH_NOEXCEPT noexcept # define CATCH_NOEXCEPT_IS(x) noexcept(x) #else # define CATCH_NOEXCEPT throw() # define CATCH_NOEXCEPT_IS(x) #endif // nullptr support #ifdef CATCH_CONFIG_CPP11_NULLPTR # define CATCH_NULL nullptr #else # define CATCH_NULL NULL #endif // override support #ifdef CATCH_CONFIG_CPP11_OVERRIDE # define CATCH_OVERRIDE override #else # define CATCH_OVERRIDE #endif // unique_ptr support #ifdef CATCH_CONFIG_CPP11_UNIQUE_PTR # define CATCH_AUTO_PTR( T ) std::unique_ptr #else # define CATCH_AUTO_PTR( T ) std::auto_ptr #endif #define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line #define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) #ifdef CATCH_CONFIG_COUNTER # define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __COUNTER__ ) #else # define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ ) #endif #define INTERNAL_CATCH_STRINGIFY2( expr ) #expr #define INTERNAL_CATCH_STRINGIFY( expr ) INTERNAL_CATCH_STRINGIFY2( expr ) #include #include #include namespace Catch { struct IConfig; struct CaseSensitive { enum Choice { Yes, No }; }; class NonCopyable { #ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS NonCopyable( NonCopyable const& ) = delete; NonCopyable( NonCopyable && ) = delete; NonCopyable& operator = ( NonCopyable const& ) = delete; NonCopyable& operator = ( NonCopyable && ) = delete; #else NonCopyable( NonCopyable const& info ); NonCopyable& operator = ( NonCopyable const& ); #endif protected: NonCopyable() {} virtual ~NonCopyable(); }; class SafeBool { public: typedef void (SafeBool::*type)() const; static type makeSafe( bool value ) { return value ? &SafeBool::trueValue : 0; } private: void trueValue() const {} }; template inline void deleteAll( ContainerT& container ) { typename ContainerT::const_iterator it = container.begin(); typename ContainerT::const_iterator itEnd = container.end(); for(; it != itEnd; ++it ) delete *it; } template inline void deleteAllValues( AssociativeContainerT& container ) { typename AssociativeContainerT::const_iterator it = container.begin(); typename AssociativeContainerT::const_iterator itEnd = container.end(); for(; it != itEnd; ++it ) delete it->second; } bool startsWith( std::string const& s, std::string const& prefix ); bool endsWith( std::string const& s, std::string const& suffix ); bool contains( std::string const& s, std::string const& infix ); void toLowerInPlace( std::string& s ); std::string toLower( std::string const& s ); std::string trim( std::string const& str ); bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ); struct pluralise { pluralise( std::size_t count, std::string const& label ); friend std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ); std::size_t m_count; std::string m_label; }; struct SourceLineInfo { SourceLineInfo(); SourceLineInfo( char const* _file, std::size_t _line ); SourceLineInfo( SourceLineInfo const& other ); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS SourceLineInfo( SourceLineInfo && ) = default; SourceLineInfo& operator = ( SourceLineInfo const& ) = default; SourceLineInfo& operator = ( SourceLineInfo && ) = default; # endif bool empty() const; bool operator == ( SourceLineInfo const& other ) const; bool operator < ( SourceLineInfo const& other ) const; std::string file; std::size_t line; }; std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ); // This is just here to avoid compiler warnings with macro constants and boolean literals inline bool isTrue( bool value ){ return value; } inline bool alwaysTrue() { return true; } inline bool alwaysFalse() { return false; } void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ); void seedRng( IConfig const& config ); unsigned int rngSeed(); // Use this in variadic streaming macros to allow // >> +StreamEndStop // as well as // >> stuff +StreamEndStop struct StreamEndStop { std::string operator+() { return std::string(); } }; template T const& operator + ( T const& value, StreamEndStop ) { return value; } } #define CATCH_INTERNAL_LINEINFO ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) #define CATCH_INTERNAL_ERROR( msg ) ::Catch::throwLogicError( msg, CATCH_INTERNAL_LINEINFO ); #include namespace Catch { class NotImplementedException : public std::exception { public: NotImplementedException( SourceLineInfo const& lineInfo ); NotImplementedException( NotImplementedException const& ) {} virtual ~NotImplementedException() CATCH_NOEXCEPT {} virtual const char* what() const CATCH_NOEXCEPT; private: std::string m_what; SourceLineInfo m_lineInfo; }; } // end namespace Catch /////////////////////////////////////////////////////////////////////////////// #define CATCH_NOT_IMPLEMENTED throw Catch::NotImplementedException( CATCH_INTERNAL_LINEINFO ) // #included from: internal/catch_context.h #define TWOBLUECUBES_CATCH_CONTEXT_H_INCLUDED // #included from: catch_interfaces_generators.h #define TWOBLUECUBES_CATCH_INTERFACES_GENERATORS_H_INCLUDED #include namespace Catch { struct IGeneratorInfo { virtual ~IGeneratorInfo(); virtual bool moveNext() = 0; virtual std::size_t getCurrentIndex() const = 0; }; struct IGeneratorsForTest { virtual ~IGeneratorsForTest(); virtual IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) = 0; virtual bool moveNext() = 0; }; IGeneratorsForTest* createGeneratorsForTest(); } // end namespace Catch // #included from: catch_ptr.hpp #define TWOBLUECUBES_CATCH_PTR_HPP_INCLUDED #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpadded" #endif namespace Catch { // An intrusive reference counting smart pointer. // T must implement addRef() and release() methods // typically implementing the IShared interface template class Ptr { public: Ptr() : m_p( CATCH_NULL ){} Ptr( T* p ) : m_p( p ){ if( m_p ) m_p->addRef(); } Ptr( Ptr const& other ) : m_p( other.m_p ){ if( m_p ) m_p->addRef(); } ~Ptr(){ if( m_p ) m_p->release(); } void reset() { if( m_p ) m_p->release(); m_p = CATCH_NULL; } Ptr& operator = ( T* p ){ Ptr temp( p ); swap( temp ); return *this; } Ptr& operator = ( Ptr const& other ){ Ptr temp( other ); swap( temp ); return *this; } void swap( Ptr& other ) { std::swap( m_p, other.m_p ); } T* get() const{ return m_p; } T& operator*() const { return *m_p; } T* operator->() const { return m_p; } bool operator !() const { return m_p == CATCH_NULL; } operator SafeBool::type() const { return SafeBool::makeSafe( m_p != CATCH_NULL ); } private: T* m_p; }; struct IShared : NonCopyable { virtual ~IShared(); virtual void addRef() const = 0; virtual void release() const = 0; }; template struct SharedImpl : T { SharedImpl() : m_rc( 0 ){} virtual void addRef() const { ++m_rc; } virtual void release() const { if( --m_rc == 0 ) delete this; } mutable unsigned int m_rc; }; } // end namespace Catch #ifdef __clang__ #pragma clang diagnostic pop #endif #include #include #include namespace Catch { class TestCase; class Stream; struct IResultCapture; struct IRunner; struct IGeneratorsForTest; struct IConfig; struct IContext { virtual ~IContext(); virtual IResultCapture* getResultCapture() = 0; virtual IRunner* getRunner() = 0; virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) = 0; virtual bool advanceGeneratorsForCurrentTest() = 0; virtual Ptr getConfig() const = 0; }; struct IMutableContext : IContext { virtual ~IMutableContext(); virtual void setResultCapture( IResultCapture* resultCapture ) = 0; virtual void setRunner( IRunner* runner ) = 0; virtual void setConfig( Ptr const& config ) = 0; }; IContext& getCurrentContext(); IMutableContext& getCurrentMutableContext(); void cleanUpContext(); Stream createStream( std::string const& streamName ); } // #included from: internal/catch_test_registry.hpp #define TWOBLUECUBES_CATCH_TEST_REGISTRY_HPP_INCLUDED // #included from: catch_interfaces_testcase.h #define TWOBLUECUBES_CATCH_INTERFACES_TESTCASE_H_INCLUDED #include namespace Catch { class TestSpec; struct ITestCase : IShared { virtual void invoke () const = 0; protected: virtual ~ITestCase(); }; class TestCase; struct IConfig; struct ITestCaseRegistry { virtual ~ITestCaseRegistry(); virtual std::vector const& getAllTests() const = 0; virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; }; bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); std::vector const& getAllTestCasesSorted( IConfig const& config ); } namespace Catch { template class MethodTestCase : public SharedImpl { public: MethodTestCase( void (C::*method)() ) : m_method( method ) {} virtual void invoke() const { C obj; (obj.*m_method)(); } private: virtual ~MethodTestCase() {} void (C::*m_method)(); }; typedef void(*TestFunction)(); struct NameAndDesc { NameAndDesc( const char* _name = "", const char* _description= "" ) : name( _name ), description( _description ) {} const char* name; const char* description; }; void registerTestCase ( ITestCase* testCase, char const* className, NameAndDesc const& nameAndDesc, SourceLineInfo const& lineInfo ); struct AutoReg { AutoReg ( TestFunction function, SourceLineInfo const& lineInfo, NameAndDesc const& nameAndDesc ); template AutoReg ( void (C::*method)(), char const* className, NameAndDesc const& nameAndDesc, SourceLineInfo const& lineInfo ) { registerTestCase ( new MethodTestCase( method ), className, nameAndDesc, lineInfo ); } ~AutoReg(); private: AutoReg( AutoReg const& ); void operator= ( AutoReg const& ); }; void registerTestCaseFunction ( TestFunction function, SourceLineInfo const& lineInfo, NameAndDesc const& nameAndDesc ); } // end namespace Catch #ifdef CATCH_CONFIG_VARIADIC_MACROS /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TESTCASE2( TestName, ... ) \ static void TestName(); \ namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &TestName, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( __VA_ARGS__ ) ); }\ static void TestName() #define INTERNAL_CATCH_TESTCASE( ... ) \ INTERNAL_CATCH_TESTCASE2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), __VA_ARGS__ ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, ... ) \ namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); } /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TEST_CASE_METHOD2( TestName, ClassName, ... )\ namespace{ \ struct TestName : ClassName{ \ void test(); \ }; \ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &TestName::test, #ClassName, Catch::NameAndDesc( __VA_ARGS__ ), CATCH_INTERNAL_LINEINFO ); \ } \ void TestName::test() #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, ... ) \ INTERNAL_CATCH_TEST_CASE_METHOD2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), ClassName, __VA_ARGS__ ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_REGISTER_TESTCASE( Function, ... ) \ Catch::AutoReg( Function, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( __VA_ARGS__ ) ); #else /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TESTCASE2( TestName, Name, Desc ) \ static void TestName(); \ namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &TestName, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( Name, Desc ) ); }\ static void TestName() #define INTERNAL_CATCH_TESTCASE( Name, Desc ) \ INTERNAL_CATCH_TESTCASE2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), Name, Desc ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_METHOD_AS_TEST_CASE( QualifiedMethod, Name, Desc ) \ namespace{ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar )( &QualifiedMethod, "&" #QualifiedMethod, Catch::NameAndDesc( Name, Desc ), CATCH_INTERNAL_LINEINFO ); } /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TEST_CASE_METHOD2( TestCaseName, ClassName, TestName, Desc )\ namespace{ \ struct TestCaseName : ClassName{ \ void test(); \ }; \ Catch::AutoReg INTERNAL_CATCH_UNIQUE_NAME( autoRegistrar ) ( &TestCaseName::test, #ClassName, Catch::NameAndDesc( TestName, Desc ), CATCH_INTERNAL_LINEINFO ); \ } \ void TestCaseName::test() #define INTERNAL_CATCH_TEST_CASE_METHOD( ClassName, TestName, Desc )\ INTERNAL_CATCH_TEST_CASE_METHOD2( INTERNAL_CATCH_UNIQUE_NAME( ____C_A_T_C_H____T_E_S_T____ ), ClassName, TestName, Desc ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_REGISTER_TESTCASE( Function, Name, Desc ) \ Catch::AutoReg( Function, CATCH_INTERNAL_LINEINFO, Catch::NameAndDesc( Name, Desc ) ); #endif // #included from: internal/catch_capture.hpp #define TWOBLUECUBES_CATCH_CAPTURE_HPP_INCLUDED // #included from: catch_result_builder.h #define TWOBLUECUBES_CATCH_RESULT_BUILDER_H_INCLUDED // #included from: catch_result_type.h #define TWOBLUECUBES_CATCH_RESULT_TYPE_H_INCLUDED namespace Catch { // ResultWas::OfType enum struct ResultWas { enum OfType { Unknown = -1, Ok = 0, Info = 1, Warning = 2, FailureBit = 0x10, ExpressionFailed = FailureBit | 1, ExplicitFailure = FailureBit | 2, Exception = 0x100 | FailureBit, ThrewException = Exception | 1, DidntThrowException = Exception | 2, FatalErrorCondition = 0x200 | FailureBit }; }; inline bool isOk( ResultWas::OfType resultType ) { return ( resultType & ResultWas::FailureBit ) == 0; } inline bool isJustInfo( int flags ) { return flags == ResultWas::Info; } // ResultDisposition::Flags enum struct ResultDisposition { enum Flags { Normal = 0x01, ContinueOnFailure = 0x02, // Failures fail test, but execution continues FalseTest = 0x04, // Prefix expression with ! SuppressFail = 0x08 // Failures are reported but do not fail the test }; }; inline ResultDisposition::Flags operator | ( ResultDisposition::Flags lhs, ResultDisposition::Flags rhs ) { return static_cast( static_cast( lhs ) | static_cast( rhs ) ); } inline bool shouldContinueOnFailure( int flags ) { return ( flags & ResultDisposition::ContinueOnFailure ) != 0; } inline bool isFalseTest( int flags ) { return ( flags & ResultDisposition::FalseTest ) != 0; } inline bool shouldSuppressFailure( int flags ) { return ( flags & ResultDisposition::SuppressFail ) != 0; } } // end namespace Catch // #included from: catch_assertionresult.h #define TWOBLUECUBES_CATCH_ASSERTIONRESULT_H_INCLUDED #include namespace Catch { struct AssertionInfo { AssertionInfo() {} AssertionInfo( std::string const& _macroName, SourceLineInfo const& _lineInfo, std::string const& _capturedExpression, ResultDisposition::Flags _resultDisposition ); std::string macroName; SourceLineInfo lineInfo; std::string capturedExpression; ResultDisposition::Flags resultDisposition; }; struct AssertionResultData { AssertionResultData() : resultType( ResultWas::Unknown ) {} std::string reconstructedExpression; std::string message; ResultWas::OfType resultType; }; class AssertionResult { public: AssertionResult(); AssertionResult( AssertionInfo const& info, AssertionResultData const& data ); ~AssertionResult(); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS AssertionResult( AssertionResult const& ) = default; AssertionResult( AssertionResult && ) = default; AssertionResult& operator = ( AssertionResult const& ) = default; AssertionResult& operator = ( AssertionResult && ) = default; # endif bool isOk() const; bool succeeded() const; ResultWas::OfType getResultType() const; bool hasExpression() const; bool hasMessage() const; std::string getExpression() const; std::string getExpressionInMacro() const; bool hasExpandedExpression() const; std::string getExpandedExpression() const; std::string getMessage() const; SourceLineInfo getSourceInfo() const; std::string getTestMacroName() const; protected: AssertionInfo m_info; AssertionResultData m_resultData; }; } // end namespace Catch // #included from: catch_matchers.hpp #define TWOBLUECUBES_CATCH_MATCHERS_HPP_INCLUDED namespace Catch { namespace Matchers { namespace Impl { namespace Generic { template class AllOf; template class AnyOf; template class Not; } template struct Matcher : SharedImpl { typedef ExpressionT ExpressionType; virtual ~Matcher() {} virtual Ptr clone() const = 0; virtual bool match( ExpressionT const& expr ) const = 0; virtual std::string toString() const = 0; Generic::AllOf operator && ( Matcher const& other ) const; Generic::AnyOf operator || ( Matcher const& other ) const; Generic::Not operator ! () const; }; template struct MatcherImpl : Matcher { virtual Ptr > clone() const { return Ptr >( new DerivedT( static_cast( *this ) ) ); } }; namespace Generic { template class Not : public MatcherImpl, ExpressionT> { public: explicit Not( Matcher const& matcher ) : m_matcher(matcher.clone()) {} Not( Not const& other ) : m_matcher( other.m_matcher ) {} virtual bool match( ExpressionT const& expr ) const CATCH_OVERRIDE { return !m_matcher->match( expr ); } virtual std::string toString() const CATCH_OVERRIDE { return "not " + m_matcher->toString(); } private: Ptr< Matcher > m_matcher; }; template class AllOf : public MatcherImpl, ExpressionT> { public: AllOf() {} AllOf( AllOf const& other ) : m_matchers( other.m_matchers ) {} AllOf& add( Matcher const& matcher ) { m_matchers.push_back( matcher.clone() ); return *this; } virtual bool match( ExpressionT const& expr ) const { for( std::size_t i = 0; i < m_matchers.size(); ++i ) if( !m_matchers[i]->match( expr ) ) return false; return true; } virtual std::string toString() const { std::ostringstream oss; oss << "( "; for( std::size_t i = 0; i < m_matchers.size(); ++i ) { if( i != 0 ) oss << " and "; oss << m_matchers[i]->toString(); } oss << " )"; return oss.str(); } AllOf operator && ( Matcher const& other ) const { AllOf allOfExpr( *this ); allOfExpr.add( other ); return allOfExpr; } private: std::vector > > m_matchers; }; template class AnyOf : public MatcherImpl, ExpressionT> { public: AnyOf() {} AnyOf( AnyOf const& other ) : m_matchers( other.m_matchers ) {} AnyOf& add( Matcher const& matcher ) { m_matchers.push_back( matcher.clone() ); return *this; } virtual bool match( ExpressionT const& expr ) const { for( std::size_t i = 0; i < m_matchers.size(); ++i ) if( m_matchers[i]->match( expr ) ) return true; return false; } virtual std::string toString() const { std::ostringstream oss; oss << "( "; for( std::size_t i = 0; i < m_matchers.size(); ++i ) { if( i != 0 ) oss << " or "; oss << m_matchers[i]->toString(); } oss << " )"; return oss.str(); } AnyOf operator || ( Matcher const& other ) const { AnyOf anyOfExpr( *this ); anyOfExpr.add( other ); return anyOfExpr; } private: std::vector > > m_matchers; }; } // namespace Generic template Generic::AllOf Matcher::operator && ( Matcher const& other ) const { Generic::AllOf allOfExpr; allOfExpr.add( *this ); allOfExpr.add( other ); return allOfExpr; } template Generic::AnyOf Matcher::operator || ( Matcher const& other ) const { Generic::AnyOf anyOfExpr; anyOfExpr.add( *this ); anyOfExpr.add( other ); return anyOfExpr; } template Generic::Not Matcher::operator ! () const { return Generic::Not( *this ); } namespace StdString { inline std::string makeString( std::string const& str ) { return str; } inline std::string makeString( const char* str ) { return str ? std::string( str ) : std::string(); } struct CasedString { CasedString( std::string const& str, CaseSensitive::Choice caseSensitivity ) : m_caseSensitivity( caseSensitivity ), m_str( adjustString( str ) ) {} std::string adjustString( std::string const& str ) const { return m_caseSensitivity == CaseSensitive::No ? toLower( str ) : str; } std::string toStringSuffix() const { return m_caseSensitivity == CaseSensitive::No ? " (case insensitive)" : ""; } CaseSensitive::Choice m_caseSensitivity; std::string m_str; }; struct Equals : MatcherImpl { Equals( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) : m_data( str, caseSensitivity ) {} Equals( Equals const& other ) : m_data( other.m_data ){} virtual ~Equals(); virtual bool match( std::string const& expr ) const { return m_data.m_str == m_data.adjustString( expr );; } virtual std::string toString() const { return "equals: \"" + m_data.m_str + "\"" + m_data.toStringSuffix(); } CasedString m_data; }; struct Contains : MatcherImpl { Contains( std::string const& substr, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) : m_data( substr, caseSensitivity ){} Contains( Contains const& other ) : m_data( other.m_data ){} virtual ~Contains(); virtual bool match( std::string const& expr ) const { return m_data.adjustString( expr ).find( m_data.m_str ) != std::string::npos; } virtual std::string toString() const { return "contains: \"" + m_data.m_str + "\"" + m_data.toStringSuffix(); } CasedString m_data; }; struct StartsWith : MatcherImpl { StartsWith( std::string const& substr, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) : m_data( substr, caseSensitivity ){} StartsWith( StartsWith const& other ) : m_data( other.m_data ){} virtual ~StartsWith(); virtual bool match( std::string const& expr ) const { return startsWith( m_data.adjustString( expr ), m_data.m_str ); } virtual std::string toString() const { return "starts with: \"" + m_data.m_str + "\"" + m_data.toStringSuffix(); } CasedString m_data; }; struct EndsWith : MatcherImpl { EndsWith( std::string const& substr, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) : m_data( substr, caseSensitivity ){} EndsWith( EndsWith const& other ) : m_data( other.m_data ){} virtual ~EndsWith(); virtual bool match( std::string const& expr ) const { return endsWith( m_data.adjustString( expr ), m_data.m_str ); } virtual std::string toString() const { return "ends with: \"" + m_data.m_str + "\"" + m_data.toStringSuffix(); } CasedString m_data; }; } // namespace StdString } // namespace Impl // The following functions create the actual matcher objects. // This allows the types to be inferred template inline Impl::Generic::Not Not( Impl::Matcher const& m ) { return Impl::Generic::Not( m ); } template inline Impl::Generic::AllOf AllOf( Impl::Matcher const& m1, Impl::Matcher const& m2 ) { return Impl::Generic::AllOf().add( m1 ).add( m2 ); } template inline Impl::Generic::AllOf AllOf( Impl::Matcher const& m1, Impl::Matcher const& m2, Impl::Matcher const& m3 ) { return Impl::Generic::AllOf().add( m1 ).add( m2 ).add( m3 ); } template inline Impl::Generic::AnyOf AnyOf( Impl::Matcher const& m1, Impl::Matcher const& m2 ) { return Impl::Generic::AnyOf().add( m1 ).add( m2 ); } template inline Impl::Generic::AnyOf AnyOf( Impl::Matcher const& m1, Impl::Matcher const& m2, Impl::Matcher const& m3 ) { return Impl::Generic::AnyOf().add( m1 ).add( m2 ).add( m3 ); } inline Impl::StdString::Equals Equals( std::string const& str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) { return Impl::StdString::Equals( str, caseSensitivity ); } inline Impl::StdString::Equals Equals( const char* str, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) { return Impl::StdString::Equals( Impl::StdString::makeString( str ), caseSensitivity ); } inline Impl::StdString::Contains Contains( std::string const& substr, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) { return Impl::StdString::Contains( substr, caseSensitivity ); } inline Impl::StdString::Contains Contains( const char* substr, CaseSensitive::Choice caseSensitivity = CaseSensitive::Yes ) { return Impl::StdString::Contains( Impl::StdString::makeString( substr ), caseSensitivity ); } inline Impl::StdString::StartsWith StartsWith( std::string const& substr ) { return Impl::StdString::StartsWith( substr ); } inline Impl::StdString::StartsWith StartsWith( const char* substr ) { return Impl::StdString::StartsWith( Impl::StdString::makeString( substr ) ); } inline Impl::StdString::EndsWith EndsWith( std::string const& substr ) { return Impl::StdString::EndsWith( substr ); } inline Impl::StdString::EndsWith EndsWith( const char* substr ) { return Impl::StdString::EndsWith( Impl::StdString::makeString( substr ) ); } } // namespace Matchers using namespace Matchers; } // namespace Catch namespace Catch { struct TestFailureException{}; template class ExpressionLhs; struct STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison; struct CopyableStream { CopyableStream() {} CopyableStream( CopyableStream const& other ) { oss << other.oss.str(); } CopyableStream& operator=( CopyableStream const& other ) { oss.str(""); oss << other.oss.str(); return *this; } std::ostringstream oss; }; class ResultBuilder { public: ResultBuilder( char const* macroName, SourceLineInfo const& lineInfo, char const* capturedExpression, ResultDisposition::Flags resultDisposition, char const* secondArg = "" ); template ExpressionLhs operator <= ( T const& operand ); ExpressionLhs operator <= ( bool value ); template ResultBuilder& operator << ( T const& value ) { m_stream.oss << value; return *this; } template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator && ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator || ( RhsT const& ); ResultBuilder& setResultType( ResultWas::OfType result ); ResultBuilder& setResultType( bool result ); ResultBuilder& setLhs( std::string const& lhs ); ResultBuilder& setRhs( std::string const& rhs ); ResultBuilder& setOp( std::string const& op ); void endExpression(); std::string reconstructExpression() const; AssertionResult build() const; void useActiveException( ResultDisposition::Flags resultDisposition = ResultDisposition::Normal ); void captureResult( ResultWas::OfType resultType ); void captureExpression(); void captureExpectedException( std::string const& expectedMessage ); void captureExpectedException( Matchers::Impl::Matcher const& matcher ); void handleResult( AssertionResult const& result ); void react(); bool shouldDebugBreak() const; bool allowThrows() const; private: AssertionInfo m_assertionInfo; AssertionResultData m_data; struct ExprComponents { ExprComponents() : testFalse( false ) {} bool testFalse; std::string lhs, rhs, op; } m_exprComponents; CopyableStream m_stream; bool m_shouldDebugBreak; bool m_shouldThrow; }; } // namespace Catch // Include after due to circular dependency: // #included from: catch_expression_lhs.hpp #define TWOBLUECUBES_CATCH_EXPRESSION_LHS_HPP_INCLUDED // #included from: catch_evaluate.hpp #define TWOBLUECUBES_CATCH_EVALUATE_HPP_INCLUDED #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable:4389) // '==' : signed/unsigned mismatch #endif #include namespace Catch { namespace Internal { enum Operator { IsEqualTo, IsNotEqualTo, IsLessThan, IsGreaterThan, IsLessThanOrEqualTo, IsGreaterThanOrEqualTo }; template struct OperatorTraits { static const char* getName(){ return "*error*"; } }; template<> struct OperatorTraits { static const char* getName(){ return "=="; } }; template<> struct OperatorTraits { static const char* getName(){ return "!="; } }; template<> struct OperatorTraits { static const char* getName(){ return "<"; } }; template<> struct OperatorTraits { static const char* getName(){ return ">"; } }; template<> struct OperatorTraits { static const char* getName(){ return "<="; } }; template<> struct OperatorTraits{ static const char* getName(){ return ">="; } }; template inline T& opCast(T const& t) { return const_cast(t); } // nullptr_t support based on pull request #154 from Konstantin Baumann #ifdef CATCH_CONFIG_CPP11_NULLPTR inline std::nullptr_t opCast(std::nullptr_t) { return nullptr; } #endif // CATCH_CONFIG_CPP11_NULLPTR // So the compare overloads can be operator agnostic we convey the operator as a template // enum, which is used to specialise an Evaluator for doing the comparison. template class Evaluator{}; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs) { return bool( opCast( lhs ) == opCast( rhs ) ); } }; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs ) { return bool( opCast( lhs ) != opCast( rhs ) ); } }; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs ) { return bool( opCast( lhs ) < opCast( rhs ) ); } }; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs ) { return bool( opCast( lhs ) > opCast( rhs ) ); } }; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs ) { return bool( opCast( lhs ) >= opCast( rhs ) ); } }; template struct Evaluator { static bool evaluate( T1 const& lhs, T2 const& rhs ) { return bool( opCast( lhs ) <= opCast( rhs ) ); } }; template bool applyEvaluator( T1 const& lhs, T2 const& rhs ) { return Evaluator::evaluate( lhs, rhs ); } // This level of indirection allows us to specialise for integer types // to avoid signed/ unsigned warnings // "base" overload template bool compare( T1 const& lhs, T2 const& rhs ) { return Evaluator::evaluate( lhs, rhs ); } // unsigned X to int template bool compare( unsigned int lhs, int rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } template bool compare( unsigned long lhs, int rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } template bool compare( unsigned char lhs, int rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } // unsigned X to long template bool compare( unsigned int lhs, long rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } template bool compare( unsigned long lhs, long rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } template bool compare( unsigned char lhs, long rhs ) { return applyEvaluator( lhs, static_cast( rhs ) ); } // int to unsigned X template bool compare( int lhs, unsigned int rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( int lhs, unsigned long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( int lhs, unsigned char rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } // long to unsigned X template bool compare( long lhs, unsigned int rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( long lhs, unsigned long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( long lhs, unsigned char rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } // pointer to long (when comparing against NULL) template bool compare( long lhs, T* rhs ) { return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); } template bool compare( T* lhs, long rhs ) { return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); } // pointer to int (when comparing against NULL) template bool compare( int lhs, T* rhs ) { return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); } template bool compare( T* lhs, int rhs ) { return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); } #ifdef CATCH_CONFIG_CPP11_LONG_LONG // long long to unsigned X template bool compare( long long lhs, unsigned int rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( long long lhs, unsigned long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( long long lhs, unsigned long long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( long long lhs, unsigned char rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } // unsigned long long to X template bool compare( unsigned long long lhs, int rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( unsigned long long lhs, long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( unsigned long long lhs, long long rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } template bool compare( unsigned long long lhs, char rhs ) { return applyEvaluator( static_cast( lhs ), rhs ); } // pointer to long long (when comparing against NULL) template bool compare( long long lhs, T* rhs ) { return Evaluator::evaluate( reinterpret_cast( lhs ), rhs ); } template bool compare( T* lhs, long long rhs ) { return Evaluator::evaluate( lhs, reinterpret_cast( rhs ) ); } #endif // CATCH_CONFIG_CPP11_LONG_LONG #ifdef CATCH_CONFIG_CPP11_NULLPTR // pointer to nullptr_t (when comparing against nullptr) template bool compare( std::nullptr_t, T* rhs ) { return Evaluator::evaluate( nullptr, rhs ); } template bool compare( T* lhs, std::nullptr_t ) { return Evaluator::evaluate( lhs, nullptr ); } #endif // CATCH_CONFIG_CPP11_NULLPTR } // end of namespace Internal } // end of namespace Catch #ifdef _MSC_VER #pragma warning(pop) #endif // #included from: catch_tostring.h #define TWOBLUECUBES_CATCH_TOSTRING_H_INCLUDED #include #include #include #include #include #ifdef __OBJC__ // #included from: catch_objc_arc.hpp #define TWOBLUECUBES_CATCH_OBJC_ARC_HPP_INCLUDED #import #ifdef __has_feature #define CATCH_ARC_ENABLED __has_feature(objc_arc) #else #define CATCH_ARC_ENABLED 0 #endif void arcSafeRelease( NSObject* obj ); id performOptionalSelector( id obj, SEL sel ); #if !CATCH_ARC_ENABLED inline void arcSafeRelease( NSObject* obj ) { [obj release]; } inline id performOptionalSelector( id obj, SEL sel ) { if( [obj respondsToSelector: sel] ) return [obj performSelector: sel]; return nil; } #define CATCH_UNSAFE_UNRETAINED #define CATCH_ARC_STRONG #else inline void arcSafeRelease( NSObject* ){} inline id performOptionalSelector( id obj, SEL sel ) { #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Warc-performSelector-leaks" #endif if( [obj respondsToSelector: sel] ) return [obj performSelector: sel]; #ifdef __clang__ #pragma clang diagnostic pop #endif return nil; } #define CATCH_UNSAFE_UNRETAINED __unsafe_unretained #define CATCH_ARC_STRONG __strong #endif #endif #ifdef CATCH_CONFIG_CPP11_TUPLE #include #endif #ifdef CATCH_CONFIG_CPP11_IS_ENUM #include #endif namespace Catch { // Why we're here. template std::string toString( T const& value ); // Built in overloads std::string toString( std::string const& value ); std::string toString( std::wstring const& value ); std::string toString( const char* const value ); std::string toString( char* const value ); std::string toString( const wchar_t* const value ); std::string toString( wchar_t* const value ); std::string toString( int value ); std::string toString( unsigned long value ); std::string toString( unsigned int value ); std::string toString( const double value ); std::string toString( const float value ); std::string toString( bool value ); std::string toString( char value ); std::string toString( signed char value ); std::string toString( unsigned char value ); #ifdef CATCH_CONFIG_CPP11_LONG_LONG std::string toString( long long value ); std::string toString( unsigned long long value ); #endif #ifdef CATCH_CONFIG_CPP11_NULLPTR std::string toString( std::nullptr_t ); #endif #ifdef __OBJC__ std::string toString( NSString const * const& nsstring ); std::string toString( NSString * CATCH_ARC_STRONG const& nsstring ); std::string toString( NSObject* const& nsObject ); #endif namespace Detail { extern const std::string unprintableString; struct BorgType { template BorgType( T const& ); }; struct TrueType { char sizer[1]; }; struct FalseType { char sizer[2]; }; TrueType& testStreamable( std::ostream& ); FalseType testStreamable( FalseType ); FalseType operator<<( std::ostream const&, BorgType const& ); template struct IsStreamInsertable { static std::ostream &s; static T const&t; enum { value = sizeof( testStreamable(s << t) ) == sizeof( TrueType ) }; }; #if defined(CATCH_CONFIG_CPP11_IS_ENUM) template::value > struct EnumStringMaker { static std::string convert( T const& ) { return unprintableString; } }; template struct EnumStringMaker { static std::string convert( T const& v ) { return ::Catch::toString( static_cast::type>(v) ); } }; #endif template struct StringMakerBase { #if defined(CATCH_CONFIG_CPP11_IS_ENUM) template static std::string convert( T const& v ) { return EnumStringMaker::convert( v ); } #else template static std::string convert( T const& ) { return unprintableString; } #endif }; template<> struct StringMakerBase { template static std::string convert( T const& _value ) { std::ostringstream oss; oss << _value; return oss.str(); } }; std::string rawMemoryToString( const void *object, std::size_t size ); template inline std::string rawMemoryToString( const T& object ) { return rawMemoryToString( &object, sizeof(object) ); } } // end namespace Detail template struct StringMaker : Detail::StringMakerBase::value> {}; template struct StringMaker { template static std::string convert( U* p ) { if( !p ) return "NULL"; else return Detail::rawMemoryToString( p ); } }; template struct StringMaker { static std::string convert( R C::* p ) { if( !p ) return "NULL"; else return Detail::rawMemoryToString( p ); } }; namespace Detail { template std::string rangeToString( InputIterator first, InputIterator last ); } //template //struct StringMaker > { // static std::string convert( std::vector const& v ) { // return Detail::rangeToString( v.begin(), v.end() ); // } //}; template std::string toString( std::vector const& v ) { return Detail::rangeToString( v.begin(), v.end() ); } #ifdef CATCH_CONFIG_CPP11_TUPLE // toString for tuples namespace TupleDetail { template< typename Tuple, std::size_t N = 0, bool = (N < std::tuple_size::value) > struct ElementPrinter { static void print( const Tuple& tuple, std::ostream& os ) { os << ( N ? ", " : " " ) << Catch::toString(std::get(tuple)); ElementPrinter::print(tuple,os); } }; template< typename Tuple, std::size_t N > struct ElementPrinter { static void print( const Tuple&, std::ostream& ) {} }; } template struct StringMaker> { static std::string convert( const std::tuple& tuple ) { std::ostringstream os; os << '{'; TupleDetail::ElementPrinter>::print( tuple, os ); os << " }"; return os.str(); } }; #endif // CATCH_CONFIG_CPP11_TUPLE namespace Detail { template std::string makeString( T const& value ) { return StringMaker::convert( value ); } } // end namespace Detail /// \brief converts any type to a string /// /// The default template forwards on to ostringstream - except when an /// ostringstream overload does not exist - in which case it attempts to detect /// that and writes {?}. /// Overload (not specialise) this template for custom typs that you don't want /// to provide an ostream overload for. template std::string toString( T const& value ) { return StringMaker::convert( value ); } namespace Detail { template std::string rangeToString( InputIterator first, InputIterator last ) { std::ostringstream oss; oss << "{ "; if( first != last ) { oss << Catch::toString( *first ); for( ++first ; first != last ; ++first ) oss << ", " << Catch::toString( *first ); } oss << " }"; return oss.str(); } } } // end namespace Catch namespace Catch { // Wraps the LHS of an expression and captures the operator and RHS (if any) - // wrapping them all in a ResultBuilder object template class ExpressionLhs { ExpressionLhs& operator = ( ExpressionLhs const& ); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS ExpressionLhs& operator = ( ExpressionLhs && ) = delete; # endif public: ExpressionLhs( ResultBuilder& rb, T lhs ) : m_rb( rb ), m_lhs( lhs ) {} # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS ExpressionLhs( ExpressionLhs const& ) = default; ExpressionLhs( ExpressionLhs && ) = default; # endif template ResultBuilder& operator == ( RhsT const& rhs ) { return captureExpression( rhs ); } template ResultBuilder& operator != ( RhsT const& rhs ) { return captureExpression( rhs ); } template ResultBuilder& operator < ( RhsT const& rhs ) { return captureExpression( rhs ); } template ResultBuilder& operator > ( RhsT const& rhs ) { return captureExpression( rhs ); } template ResultBuilder& operator <= ( RhsT const& rhs ) { return captureExpression( rhs ); } template ResultBuilder& operator >= ( RhsT const& rhs ) { return captureExpression( rhs ); } ResultBuilder& operator == ( bool rhs ) { return captureExpression( rhs ); } ResultBuilder& operator != ( bool rhs ) { return captureExpression( rhs ); } void endExpression() { bool value = m_lhs ? true : false; m_rb .setLhs( Catch::toString( value ) ) .setResultType( value ) .endExpression(); } // Only simple binary expressions are allowed on the LHS. // If more complex compositions are required then place the sub expression in parentheses template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator + ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator - ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator / ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator * ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator && ( RhsT const& ); template STATIC_ASSERT_Expression_Too_Complex_Please_Rewrite_As_Binary_Comparison& operator || ( RhsT const& ); private: template ResultBuilder& captureExpression( RhsT const& rhs ) { return m_rb .setResultType( Internal::compare( m_lhs, rhs ) ) .setLhs( Catch::toString( m_lhs ) ) .setRhs( Catch::toString( rhs ) ) .setOp( Internal::OperatorTraits::getName() ); } private: ResultBuilder& m_rb; T m_lhs; }; } // end namespace Catch namespace Catch { template inline ExpressionLhs ResultBuilder::operator <= ( T const& operand ) { return ExpressionLhs( *this, operand ); } inline ExpressionLhs ResultBuilder::operator <= ( bool value ) { return ExpressionLhs( *this, value ); } } // namespace Catch // #included from: catch_message.h #define TWOBLUECUBES_CATCH_MESSAGE_H_INCLUDED #include namespace Catch { struct MessageInfo { MessageInfo( std::string const& _macroName, SourceLineInfo const& _lineInfo, ResultWas::OfType _type ); std::string macroName; SourceLineInfo lineInfo; ResultWas::OfType type; std::string message; unsigned int sequence; bool operator == ( MessageInfo const& other ) const { return sequence == other.sequence; } bool operator < ( MessageInfo const& other ) const { return sequence < other.sequence; } private: static unsigned int globalCount; }; struct MessageBuilder { MessageBuilder( std::string const& macroName, SourceLineInfo const& lineInfo, ResultWas::OfType type ) : m_info( macroName, lineInfo, type ) {} template MessageBuilder& operator << ( T const& value ) { m_stream << value; return *this; } MessageInfo m_info; std::ostringstream m_stream; }; class ScopedMessage { public: ScopedMessage( MessageBuilder const& builder ); ScopedMessage( ScopedMessage const& other ); ~ScopedMessage(); MessageInfo m_info; }; } // end namespace Catch // #included from: catch_interfaces_capture.h #define TWOBLUECUBES_CATCH_INTERFACES_CAPTURE_H_INCLUDED #include namespace Catch { class TestCase; class AssertionResult; struct AssertionInfo; struct SectionInfo; struct SectionEndInfo; struct MessageInfo; class ScopedMessageBuilder; struct Counts; struct IResultCapture { virtual ~IResultCapture(); virtual void assertionEnded( AssertionResult const& result ) = 0; virtual bool sectionStarted( SectionInfo const& sectionInfo, Counts& assertions ) = 0; virtual void sectionEnded( SectionEndInfo const& endInfo ) = 0; virtual void sectionEndedEarly( SectionEndInfo const& endInfo ) = 0; virtual void pushScopedMessage( MessageInfo const& message ) = 0; virtual void popScopedMessage( MessageInfo const& message ) = 0; virtual std::string getCurrentTestName() const = 0; virtual const AssertionResult* getLastResult() const = 0; virtual void handleFatalErrorCondition( std::string const& message ) = 0; }; IResultCapture& getResultCapture(); } // #included from: catch_debugger.h #define TWOBLUECUBES_CATCH_DEBUGGER_H_INCLUDED // #included from: catch_platform.h #define TWOBLUECUBES_CATCH_PLATFORM_H_INCLUDED #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) #define CATCH_PLATFORM_MAC #elif defined(__IPHONE_OS_VERSION_MIN_REQUIRED) #define CATCH_PLATFORM_IPHONE #elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) #define CATCH_PLATFORM_WINDOWS #endif #include namespace Catch{ bool isDebuggerActive(); void writeToDebugConsole( std::string const& text ); } #ifdef CATCH_PLATFORM_MAC // The following code snippet based on: // http://cocoawithlove.com/2008/03/break-into-debugger.html #ifdef DEBUG #if defined(__ppc64__) || defined(__ppc__) #define CATCH_BREAK_INTO_DEBUGGER() \ if( Catch::isDebuggerActive() ) { \ __asm__("li r0, 20\nsc\nnop\nli r0, 37\nli r4, 2\nsc\nnop\n" \ : : : "memory","r0","r3","r4" ); \ } #else #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) {__asm__("int $3\n" : : );} #endif #endif #elif defined(_MSC_VER) #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) { __debugbreak(); } #elif defined(__MINGW32__) extern "C" __declspec(dllimport) void __stdcall DebugBreak(); #define CATCH_BREAK_INTO_DEBUGGER() if( Catch::isDebuggerActive() ) { DebugBreak(); } #endif #ifndef CATCH_BREAK_INTO_DEBUGGER #define CATCH_BREAK_INTO_DEBUGGER() Catch::alwaysTrue(); #endif // #included from: catch_interfaces_runner.h #define TWOBLUECUBES_CATCH_INTERFACES_RUNNER_H_INCLUDED namespace Catch { class TestCase; struct IRunner { virtual ~IRunner(); virtual bool aborting() const = 0; }; } /////////////////////////////////////////////////////////////////////////////// // In the event of a failure works out if the debugger needs to be invoked // and/or an exception thrown and takes appropriate action. // This needs to be done as a macro so the debugger will stop in the user // source code rather than in Catch library code #define INTERNAL_CATCH_REACT( resultBuilder ) \ if( resultBuilder.shouldDebugBreak() ) CATCH_BREAK_INTO_DEBUGGER(); \ resultBuilder.react(); /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \ try { \ CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ ( __catchResult <= expr ).endExpression(); \ } \ catch( ... ) { \ __catchResult.useActiveException( Catch::ResultDisposition::Normal ); \ } \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::isTrue( false && !!(expr) ) ) // expr here is never evaluated at runtime but it forces the compiler to give it a look /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_IF( expr, resultDisposition, macroName ) \ INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ); \ if( Catch::getResultCapture().getLastResult()->succeeded() ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_ELSE( expr, resultDisposition, macroName ) \ INTERNAL_CATCH_TEST( expr, resultDisposition, macroName ); \ if( !Catch::getResultCapture().getLastResult()->succeeded() ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_NO_THROW( expr, resultDisposition, macroName ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \ try { \ expr; \ __catchResult.captureResult( Catch::ResultWas::Ok ); \ } \ catch( ... ) { \ __catchResult.useActiveException( resultDisposition ); \ } \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_THROWS( expr, resultDisposition, matcher, macroName ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition, #matcher ); \ if( __catchResult.allowThrows() ) \ try { \ expr; \ __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \ } \ catch( ... ) { \ __catchResult.captureExpectedException( matcher ); \ } \ else \ __catchResult.captureResult( Catch::ResultWas::Ok ); \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_THROWS_AS( expr, exceptionType, resultDisposition, macroName ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #expr, resultDisposition ); \ if( __catchResult.allowThrows() ) \ try { \ expr; \ __catchResult.captureResult( Catch::ResultWas::DidntThrowException ); \ } \ catch( exceptionType ) { \ __catchResult.captureResult( Catch::ResultWas::Ok ); \ } \ catch( ... ) { \ __catchResult.useActiveException( resultDisposition ); \ } \ else \ __catchResult.captureResult( Catch::ResultWas::Ok ); \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) /////////////////////////////////////////////////////////////////////////////// #ifdef CATCH_CONFIG_VARIADIC_MACROS #define INTERNAL_CATCH_MSG( messageType, resultDisposition, macroName, ... ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \ __catchResult << __VA_ARGS__ + ::Catch::StreamEndStop(); \ __catchResult.captureResult( messageType ); \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) #else #define INTERNAL_CATCH_MSG( messageType, resultDisposition, macroName, log ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, "", resultDisposition ); \ __catchResult << log + ::Catch::StreamEndStop(); \ __catchResult.captureResult( messageType ); \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) #endif /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_INFO( log, macroName ) \ Catch::ScopedMessage INTERNAL_CATCH_UNIQUE_NAME( scopedMessage ) = Catch::MessageBuilder( macroName, CATCH_INTERNAL_LINEINFO, Catch::ResultWas::Info ) << log; /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CHECK_THAT( arg, matcher, resultDisposition, macroName ) \ do { \ Catch::ResultBuilder __catchResult( macroName, CATCH_INTERNAL_LINEINFO, #arg ", " #matcher, resultDisposition ); \ try { \ std::string matcherAsString = (matcher).toString(); \ __catchResult \ .setLhs( Catch::toString( arg ) ) \ .setRhs( matcherAsString == Catch::Detail::unprintableString ? #matcher : matcherAsString ) \ .setOp( "matches" ) \ .setResultType( (matcher).match( arg ) ); \ __catchResult.captureExpression(); \ } catch( ... ) { \ __catchResult.useActiveException( resultDisposition | Catch::ResultDisposition::ContinueOnFailure ); \ } \ INTERNAL_CATCH_REACT( __catchResult ) \ } while( Catch::alwaysFalse() ) // #included from: internal/catch_section.h #define TWOBLUECUBES_CATCH_SECTION_H_INCLUDED // #included from: catch_section_info.h #define TWOBLUECUBES_CATCH_SECTION_INFO_H_INCLUDED // #included from: catch_totals.hpp #define TWOBLUECUBES_CATCH_TOTALS_HPP_INCLUDED #include namespace Catch { struct Counts { Counts() : passed( 0 ), failed( 0 ), failedButOk( 0 ) {} Counts operator - ( Counts const& other ) const { Counts diff; diff.passed = passed - other.passed; diff.failed = failed - other.failed; diff.failedButOk = failedButOk - other.failedButOk; return diff; } Counts& operator += ( Counts const& other ) { passed += other.passed; failed += other.failed; failedButOk += other.failedButOk; return *this; } std::size_t total() const { return passed + failed + failedButOk; } bool allPassed() const { return failed == 0 && failedButOk == 0; } bool allOk() const { return failed == 0; } std::size_t passed; std::size_t failed; std::size_t failedButOk; }; struct Totals { Totals operator - ( Totals const& other ) const { Totals diff; diff.assertions = assertions - other.assertions; diff.testCases = testCases - other.testCases; return diff; } Totals delta( Totals const& prevTotals ) const { Totals diff = *this - prevTotals; if( diff.assertions.failed > 0 ) ++diff.testCases.failed; else if( diff.assertions.failedButOk > 0 ) ++diff.testCases.failedButOk; else ++diff.testCases.passed; return diff; } Totals& operator += ( Totals const& other ) { assertions += other.assertions; testCases += other.testCases; return *this; } Counts assertions; Counts testCases; }; } namespace Catch { struct SectionInfo { SectionInfo ( SourceLineInfo const& _lineInfo, std::string const& _name, std::string const& _description = std::string() ); std::string name; std::string description; SourceLineInfo lineInfo; }; struct SectionEndInfo { SectionEndInfo( SectionInfo const& _sectionInfo, Counts const& _prevAssertions, double _durationInSeconds ) : sectionInfo( _sectionInfo ), prevAssertions( _prevAssertions ), durationInSeconds( _durationInSeconds ) {} SectionInfo sectionInfo; Counts prevAssertions; double durationInSeconds; }; } // end namespace Catch // #included from: catch_timer.h #define TWOBLUECUBES_CATCH_TIMER_H_INCLUDED #ifdef CATCH_PLATFORM_WINDOWS typedef unsigned long long uint64_t; #else #include #endif namespace Catch { class Timer { public: Timer() : m_ticks( 0 ) {} void start(); unsigned int getElapsedMicroseconds() const; unsigned int getElapsedMilliseconds() const; double getElapsedSeconds() const; private: uint64_t m_ticks; }; } // namespace Catch #include namespace Catch { class Section : NonCopyable { public: Section( SectionInfo const& info ); ~Section(); // This indicates whether the section should be executed or not operator bool() const; private: SectionInfo m_info; std::string m_name; Counts m_assertions; bool m_sectionIncluded; Timer m_timer; }; } // end namespace Catch #ifdef CATCH_CONFIG_VARIADIC_MACROS #define INTERNAL_CATCH_SECTION( ... ) \ if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, __VA_ARGS__ ) ) #else #define INTERNAL_CATCH_SECTION( name, desc ) \ if( Catch::Section const& INTERNAL_CATCH_UNIQUE_NAME( catch_internal_Section ) = Catch::SectionInfo( CATCH_INTERNAL_LINEINFO, name, desc ) ) #endif // #included from: internal/catch_generators.hpp #define TWOBLUECUBES_CATCH_GENERATORS_HPP_INCLUDED #include #include #include #include namespace Catch { template struct IGenerator { virtual ~IGenerator() {} virtual T getValue( std::size_t index ) const = 0; virtual std::size_t size () const = 0; }; template class BetweenGenerator : public IGenerator { public: BetweenGenerator( T from, T to ) : m_from( from ), m_to( to ){} virtual T getValue( std::size_t index ) const { return m_from+static_cast( index ); } virtual std::size_t size() const { return static_cast( 1+m_to-m_from ); } private: T m_from; T m_to; }; template class ValuesGenerator : public IGenerator { public: ValuesGenerator(){} void add( T value ) { m_values.push_back( value ); } virtual T getValue( std::size_t index ) const { return m_values[index]; } virtual std::size_t size() const { return m_values.size(); } private: std::vector m_values; }; template class CompositeGenerator { public: CompositeGenerator() : m_totalSize( 0 ) {} // *** Move semantics, similar to auto_ptr *** CompositeGenerator( CompositeGenerator& other ) : m_fileInfo( other.m_fileInfo ), m_totalSize( 0 ) { move( other ); } CompositeGenerator& setFileInfo( const char* fileInfo ) { m_fileInfo = fileInfo; return *this; } ~CompositeGenerator() { deleteAll( m_composed ); } operator T () const { size_t overallIndex = getCurrentContext().getGeneratorIndex( m_fileInfo, m_totalSize ); typename std::vector*>::const_iterator it = m_composed.begin(); typename std::vector*>::const_iterator itEnd = m_composed.end(); for( size_t index = 0; it != itEnd; ++it ) { const IGenerator* generator = *it; if( overallIndex >= index && overallIndex < index + generator->size() ) { return generator->getValue( overallIndex-index ); } index += generator->size(); } CATCH_INTERNAL_ERROR( "Indexed past end of generated range" ); return T(); // Suppress spurious "not all control paths return a value" warning in Visual Studio - if you know how to fix this please do so } void add( const IGenerator* generator ) { m_totalSize += generator->size(); m_composed.push_back( generator ); } CompositeGenerator& then( CompositeGenerator& other ) { move( other ); return *this; } CompositeGenerator& then( T value ) { ValuesGenerator* valuesGen = new ValuesGenerator(); valuesGen->add( value ); add( valuesGen ); return *this; } private: void move( CompositeGenerator& other ) { std::copy( other.m_composed.begin(), other.m_composed.end(), std::back_inserter( m_composed ) ); m_totalSize += other.m_totalSize; other.m_composed.clear(); } std::vector*> m_composed; std::string m_fileInfo; size_t m_totalSize; }; namespace Generators { template CompositeGenerator between( T from, T to ) { CompositeGenerator generators; generators.add( new BetweenGenerator( from, to ) ); return generators; } template CompositeGenerator values( T val1, T val2 ) { CompositeGenerator generators; ValuesGenerator* valuesGen = new ValuesGenerator(); valuesGen->add( val1 ); valuesGen->add( val2 ); generators.add( valuesGen ); return generators; } template CompositeGenerator values( T val1, T val2, T val3 ){ CompositeGenerator generators; ValuesGenerator* valuesGen = new ValuesGenerator(); valuesGen->add( val1 ); valuesGen->add( val2 ); valuesGen->add( val3 ); generators.add( valuesGen ); return generators; } template CompositeGenerator values( T val1, T val2, T val3, T val4 ) { CompositeGenerator generators; ValuesGenerator* valuesGen = new ValuesGenerator(); valuesGen->add( val1 ); valuesGen->add( val2 ); valuesGen->add( val3 ); valuesGen->add( val4 ); generators.add( valuesGen ); return generators; } } // end namespace Generators using namespace Generators; } // end namespace Catch #define INTERNAL_CATCH_LINESTR2( line ) #line #define INTERNAL_CATCH_LINESTR( line ) INTERNAL_CATCH_LINESTR2( line ) #define INTERNAL_CATCH_GENERATE( expr ) expr.setFileInfo( __FILE__ "(" INTERNAL_CATCH_LINESTR( __LINE__ ) ")" ) // #included from: internal/catch_interfaces_exception.h #define TWOBLUECUBES_CATCH_INTERFACES_EXCEPTION_H_INCLUDED #include #include // #included from: catch_interfaces_registry_hub.h #define TWOBLUECUBES_CATCH_INTERFACES_REGISTRY_HUB_H_INCLUDED #include namespace Catch { class TestCase; struct ITestCaseRegistry; struct IExceptionTranslatorRegistry; struct IExceptionTranslator; struct IReporterRegistry; struct IReporterFactory; struct IRegistryHub { virtual ~IRegistryHub(); virtual IReporterRegistry const& getReporterRegistry() const = 0; virtual ITestCaseRegistry const& getTestCaseRegistry() const = 0; virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() = 0; }; struct IMutableRegistryHub { virtual ~IMutableRegistryHub(); virtual void registerReporter( std::string const& name, Ptr const& factory ) = 0; virtual void registerListener( Ptr const& factory ) = 0; virtual void registerTest( TestCase const& testInfo ) = 0; virtual void registerTranslator( const IExceptionTranslator* translator ) = 0; }; IRegistryHub& getRegistryHub(); IMutableRegistryHub& getMutableRegistryHub(); void cleanUp(); std::string translateActiveException(); } namespace Catch { typedef std::string(*exceptionTranslateFunction)(); struct IExceptionTranslator; typedef std::vector ExceptionTranslators; struct IExceptionTranslator { virtual ~IExceptionTranslator(); virtual std::string translate( ExceptionTranslators::const_iterator it, ExceptionTranslators::const_iterator itEnd ) const = 0; }; struct IExceptionTranslatorRegistry { virtual ~IExceptionTranslatorRegistry(); virtual std::string translateActiveException() const = 0; }; class ExceptionTranslatorRegistrar { template class ExceptionTranslator : public IExceptionTranslator { public: ExceptionTranslator( std::string(*translateFunction)( T& ) ) : m_translateFunction( translateFunction ) {} virtual std::string translate( ExceptionTranslators::const_iterator it, ExceptionTranslators::const_iterator itEnd ) const CATCH_OVERRIDE { try { if( it == itEnd ) throw; else return (*it)->translate( it+1, itEnd ); } catch( T& ex ) { return m_translateFunction( ex ); } } protected: std::string(*m_translateFunction)( T& ); }; public: template ExceptionTranslatorRegistrar( std::string(*translateFunction)( T& ) ) { getMutableRegistryHub().registerTranslator ( new ExceptionTranslator( translateFunction ) ); } }; } /////////////////////////////////////////////////////////////////////////////// #define INTERNAL_CATCH_TRANSLATE_EXCEPTION2( translatorName, signature ) \ static std::string translatorName( signature ); \ namespace{ Catch::ExceptionTranslatorRegistrar INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionRegistrar )( &translatorName ); }\ static std::string translatorName( signature ) #define INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature ) INTERNAL_CATCH_TRANSLATE_EXCEPTION2( INTERNAL_CATCH_UNIQUE_NAME( catch_internal_ExceptionTranslator ), signature ) // #included from: internal/catch_approx.hpp #define TWOBLUECUBES_CATCH_APPROX_HPP_INCLUDED #include #include namespace Catch { namespace Detail { class Approx { public: explicit Approx ( double value ) : m_epsilon( std::numeric_limits::epsilon()*100 ), m_scale( 1.0 ), m_value( value ) {} Approx( Approx const& other ) : m_epsilon( other.m_epsilon ), m_scale( other.m_scale ), m_value( other.m_value ) {} static Approx custom() { return Approx( 0 ); } Approx operator()( double value ) { Approx approx( value ); approx.epsilon( m_epsilon ); approx.scale( m_scale ); return approx; } friend bool operator == ( double lhs, Approx const& rhs ) { // Thanks to Richard Harris for his help refining this formula return fabs( lhs - rhs.m_value ) < rhs.m_epsilon * (rhs.m_scale + (std::max)( fabs(lhs), fabs(rhs.m_value) ) ); } friend bool operator == ( Approx const& lhs, double rhs ) { return operator==( rhs, lhs ); } friend bool operator != ( double lhs, Approx const& rhs ) { return !operator==( lhs, rhs ); } friend bool operator != ( Approx const& lhs, double rhs ) { return !operator==( rhs, lhs ); } friend bool operator <= ( double lhs, Approx const& rhs ) { return lhs < rhs.m_value || lhs == rhs; } friend bool operator <= ( Approx const& lhs, double rhs ) { return lhs.m_value < rhs || lhs == rhs; } friend bool operator >= ( double lhs, Approx const& rhs ) { return lhs > rhs.m_value || lhs == rhs; } friend bool operator >= ( Approx const& lhs, double rhs ) { return lhs.m_value > rhs || lhs == rhs; } Approx& epsilon( double newEpsilon ) { m_epsilon = newEpsilon; return *this; } Approx& scale( double newScale ) { m_scale = newScale; return *this; } std::string toString() const { std::ostringstream oss; oss << "Approx( " << Catch::toString( m_value ) << " )"; return oss.str(); } private: double m_epsilon; double m_scale; double m_value; }; } template<> inline std::string toString( Detail::Approx const& value ) { return value.toString(); } } // end namespace Catch // #included from: internal/catch_interfaces_tag_alias_registry.h #define TWOBLUECUBES_CATCH_INTERFACES_TAG_ALIAS_REGISTRY_H_INCLUDED // #included from: catch_tag_alias.h #define TWOBLUECUBES_CATCH_TAG_ALIAS_H_INCLUDED #include namespace Catch { struct TagAlias { TagAlias( std::string _tag, SourceLineInfo _lineInfo ) : tag( _tag ), lineInfo( _lineInfo ) {} std::string tag; SourceLineInfo lineInfo; }; struct RegistrarForTagAliases { RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); }; } // end namespace Catch #define CATCH_REGISTER_TAG_ALIAS( alias, spec ) namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } // #included from: catch_option.hpp #define TWOBLUECUBES_CATCH_OPTION_HPP_INCLUDED namespace Catch { // An optional type template class Option { public: Option() : nullableValue( CATCH_NULL ) {} Option( T const& _value ) : nullableValue( new( storage ) T( _value ) ) {} Option( Option const& _other ) : nullableValue( _other ? new( storage ) T( *_other ) : CATCH_NULL ) {} ~Option() { reset(); } Option& operator= ( Option const& _other ) { if( &_other != this ) { reset(); if( _other ) nullableValue = new( storage ) T( *_other ); } return *this; } Option& operator = ( T const& _value ) { reset(); nullableValue = new( storage ) T( _value ); return *this; } void reset() { if( nullableValue ) nullableValue->~T(); nullableValue = CATCH_NULL; } T& operator*() { return *nullableValue; } T const& operator*() const { return *nullableValue; } T* operator->() { return nullableValue; } const T* operator->() const { return nullableValue; } T valueOr( T const& defaultValue ) const { return nullableValue ? *nullableValue : defaultValue; } bool some() const { return nullableValue != CATCH_NULL; } bool none() const { return nullableValue == CATCH_NULL; } bool operator !() const { return nullableValue == CATCH_NULL; } operator SafeBool::type() const { return SafeBool::makeSafe( some() ); } private: T* nullableValue; char storage[sizeof(T)]; }; } // end namespace Catch namespace Catch { struct ITagAliasRegistry { virtual ~ITagAliasRegistry(); virtual Option find( std::string const& alias ) const = 0; virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const = 0; static ITagAliasRegistry const& get(); }; } // end namespace Catch // These files are included here so the single_include script doesn't put them // in the conditionally compiled sections // #included from: internal/catch_test_case_info.h #define TWOBLUECUBES_CATCH_TEST_CASE_INFO_H_INCLUDED #include #include #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpadded" #endif namespace Catch { struct ITestCase; struct TestCaseInfo { enum SpecialProperties{ None = 0, IsHidden = 1 << 1, ShouldFail = 1 << 2, MayFail = 1 << 3, Throws = 1 << 4 }; TestCaseInfo( std::string const& _name, std::string const& _className, std::string const& _description, std::set const& _tags, SourceLineInfo const& _lineInfo ); TestCaseInfo( TestCaseInfo const& other ); friend void setTags( TestCaseInfo& testCaseInfo, std::set const& tags ); bool isHidden() const; bool throws() const; bool okToFail() const; bool expectedToFail() const; std::string name; std::string className; std::string description; std::set tags; std::set lcaseTags; std::string tagsAsString; SourceLineInfo lineInfo; SpecialProperties properties; }; class TestCase : public TestCaseInfo { public: TestCase( ITestCase* testCase, TestCaseInfo const& info ); TestCase( TestCase const& other ); TestCase withName( std::string const& _newName ) const; void invoke() const; TestCaseInfo const& getTestCaseInfo() const; void swap( TestCase& other ); bool operator == ( TestCase const& other ) const; bool operator < ( TestCase const& other ) const; TestCase& operator = ( TestCase const& other ); private: Ptr test; }; TestCase makeTestCase( ITestCase* testCase, std::string const& className, std::string const& name, std::string const& description, SourceLineInfo const& lineInfo ); } #ifdef __clang__ #pragma clang diagnostic pop #endif #ifdef __OBJC__ // #included from: internal/catch_objc.hpp #define TWOBLUECUBES_CATCH_OBJC_HPP_INCLUDED #import #include // NB. Any general catch headers included here must be included // in catch.hpp first to make sure they are included by the single // header for non obj-usage /////////////////////////////////////////////////////////////////////////////// // This protocol is really only here for (self) documenting purposes, since // all its methods are optional. @protocol OcFixture @optional -(void) setUp; -(void) tearDown; @end namespace Catch { class OcMethod : public SharedImpl { public: OcMethod( Class cls, SEL sel ) : m_cls( cls ), m_sel( sel ) {} virtual void invoke() const { id obj = [[m_cls alloc] init]; performOptionalSelector( obj, @selector(setUp) ); performOptionalSelector( obj, m_sel ); performOptionalSelector( obj, @selector(tearDown) ); arcSafeRelease( obj ); } private: virtual ~OcMethod() {} Class m_cls; SEL m_sel; }; namespace Detail{ inline std::string getAnnotation( Class cls, std::string const& annotationName, std::string const& testCaseName ) { NSString* selStr = [[NSString alloc] initWithFormat:@"Catch_%s_%s", annotationName.c_str(), testCaseName.c_str()]; SEL sel = NSSelectorFromString( selStr ); arcSafeRelease( selStr ); id value = performOptionalSelector( cls, sel ); if( value ) return [(NSString*)value UTF8String]; return ""; } } inline size_t registerTestMethods() { size_t noTestMethods = 0; int noClasses = objc_getClassList( CATCH_NULL, 0 ); Class* classes = (CATCH_UNSAFE_UNRETAINED Class *)malloc( sizeof(Class) * noClasses); objc_getClassList( classes, noClasses ); for( int c = 0; c < noClasses; c++ ) { Class cls = classes[c]; { u_int count; Method* methods = class_copyMethodList( cls, &count ); for( u_int m = 0; m < count ; m++ ) { SEL selector = method_getName(methods[m]); std::string methodName = sel_getName(selector); if( startsWith( methodName, "Catch_TestCase_" ) ) { std::string testCaseName = methodName.substr( 15 ); std::string name = Detail::getAnnotation( cls, "Name", testCaseName ); std::string desc = Detail::getAnnotation( cls, "Description", testCaseName ); const char* className = class_getName( cls ); getMutableRegistryHub().registerTest( makeTestCase( new OcMethod( cls, selector ), className, name.c_str(), desc.c_str(), SourceLineInfo() ) ); noTestMethods++; } } free(methods); } } return noTestMethods; } namespace Matchers { namespace Impl { namespace NSStringMatchers { template struct StringHolder : MatcherImpl{ StringHolder( NSString* substr ) : m_substr( [substr copy] ){} StringHolder( StringHolder const& other ) : m_substr( [other.m_substr copy] ){} StringHolder() { arcSafeRelease( m_substr ); } NSString* m_substr; }; struct Equals : StringHolder { Equals( NSString* substr ) : StringHolder( substr ){} virtual bool match( ExpressionType const& str ) const { return (str != nil || m_substr == nil ) && [str isEqualToString:m_substr]; } virtual std::string toString() const { return "equals string: " + Catch::toString( m_substr ); } }; struct Contains : StringHolder { Contains( NSString* substr ) : StringHolder( substr ){} virtual bool match( ExpressionType const& str ) const { return (str != nil || m_substr == nil ) && [str rangeOfString:m_substr].location != NSNotFound; } virtual std::string toString() const { return "contains string: " + Catch::toString( m_substr ); } }; struct StartsWith : StringHolder { StartsWith( NSString* substr ) : StringHolder( substr ){} virtual bool match( ExpressionType const& str ) const { return (str != nil || m_substr == nil ) && [str rangeOfString:m_substr].location == 0; } virtual std::string toString() const { return "starts with: " + Catch::toString( m_substr ); } }; struct EndsWith : StringHolder { EndsWith( NSString* substr ) : StringHolder( substr ){} virtual bool match( ExpressionType const& str ) const { return (str != nil || m_substr == nil ) && [str rangeOfString:m_substr].location == [str length] - [m_substr length]; } virtual std::string toString() const { return "ends with: " + Catch::toString( m_substr ); } }; } // namespace NSStringMatchers } // namespace Impl inline Impl::NSStringMatchers::Equals Equals( NSString* substr ){ return Impl::NSStringMatchers::Equals( substr ); } inline Impl::NSStringMatchers::Contains Contains( NSString* substr ){ return Impl::NSStringMatchers::Contains( substr ); } inline Impl::NSStringMatchers::StartsWith StartsWith( NSString* substr ){ return Impl::NSStringMatchers::StartsWith( substr ); } inline Impl::NSStringMatchers::EndsWith EndsWith( NSString* substr ){ return Impl::NSStringMatchers::EndsWith( substr ); } } // namespace Matchers using namespace Matchers; } // namespace Catch /////////////////////////////////////////////////////////////////////////////// #define OC_TEST_CASE( name, desc )\ +(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Name_test ) \ {\ return @ name; \ }\ +(NSString*) INTERNAL_CATCH_UNIQUE_NAME( Catch_Description_test ) \ { \ return @ desc; \ } \ -(void) INTERNAL_CATCH_UNIQUE_NAME( Catch_TestCase_test ) #endif #ifdef CATCH_IMPL // #included from: internal/catch_impl.hpp #define TWOBLUECUBES_CATCH_IMPL_HPP_INCLUDED // Collect all the implementation files together here // These are the equivalent of what would usually be cpp files #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wweak-vtables" #endif // #included from: ../catch_session.hpp #define TWOBLUECUBES_CATCH_RUNNER_HPP_INCLUDED // #included from: internal/catch_commandline.hpp #define TWOBLUECUBES_CATCH_COMMANDLINE_HPP_INCLUDED // #included from: catch_config.hpp #define TWOBLUECUBES_CATCH_CONFIG_HPP_INCLUDED // #included from: catch_test_spec_parser.hpp #define TWOBLUECUBES_CATCH_TEST_SPEC_PARSER_HPP_INCLUDED #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpadded" #endif // #included from: catch_test_spec.hpp #define TWOBLUECUBES_CATCH_TEST_SPEC_HPP_INCLUDED #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpadded" #endif // #included from: catch_wildcard_pattern.hpp #define TWOBLUECUBES_CATCH_WILDCARD_PATTERN_HPP_INCLUDED namespace Catch { class WildcardPattern { enum WildcardPosition { NoWildcard = 0, WildcardAtStart = 1, WildcardAtEnd = 2, WildcardAtBothEnds = WildcardAtStart | WildcardAtEnd }; public: WildcardPattern( std::string const& pattern, CaseSensitive::Choice caseSensitivity ) : m_caseSensitivity( caseSensitivity ), m_wildcard( NoWildcard ), m_pattern( adjustCase( pattern ) ) { if( startsWith( m_pattern, "*" ) ) { m_pattern = m_pattern.substr( 1 ); m_wildcard = WildcardAtStart; } if( endsWith( m_pattern, "*" ) ) { m_pattern = m_pattern.substr( 0, m_pattern.size()-1 ); m_wildcard = static_cast( m_wildcard | WildcardAtEnd ); } } virtual ~WildcardPattern(); virtual bool matches( std::string const& str ) const { switch( m_wildcard ) { case NoWildcard: return m_pattern == adjustCase( str ); case WildcardAtStart: return endsWith( adjustCase( str ), m_pattern ); case WildcardAtEnd: return startsWith( adjustCase( str ), m_pattern ); case WildcardAtBothEnds: return contains( adjustCase( str ), m_pattern ); } #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunreachable-code" #endif throw std::logic_error( "Unknown enum" ); #ifdef __clang__ #pragma clang diagnostic pop #endif } private: std::string adjustCase( std::string const& str ) const { return m_caseSensitivity == CaseSensitive::No ? toLower( str ) : str; } CaseSensitive::Choice m_caseSensitivity; WildcardPosition m_wildcard; std::string m_pattern; }; } #include #include namespace Catch { class TestSpec { struct Pattern : SharedImpl<> { virtual ~Pattern(); virtual bool matches( TestCaseInfo const& testCase ) const = 0; }; class NamePattern : public Pattern { public: NamePattern( std::string const& name ) : m_wildcardPattern( toLower( name ), CaseSensitive::No ) {} virtual ~NamePattern(); virtual bool matches( TestCaseInfo const& testCase ) const { return m_wildcardPattern.matches( toLower( testCase.name ) ); } private: WildcardPattern m_wildcardPattern; }; class TagPattern : public Pattern { public: TagPattern( std::string const& tag ) : m_tag( toLower( tag ) ) {} virtual ~TagPattern(); virtual bool matches( TestCaseInfo const& testCase ) const { return testCase.lcaseTags.find( m_tag ) != testCase.lcaseTags.end(); } private: std::string m_tag; }; class ExcludedPattern : public Pattern { public: ExcludedPattern( Ptr const& underlyingPattern ) : m_underlyingPattern( underlyingPattern ) {} virtual ~ExcludedPattern(); virtual bool matches( TestCaseInfo const& testCase ) const { return !m_underlyingPattern->matches( testCase ); } private: Ptr m_underlyingPattern; }; struct Filter { std::vector > m_patterns; bool matches( TestCaseInfo const& testCase ) const { // All patterns in a filter must match for the filter to be a match for( std::vector >::const_iterator it = m_patterns.begin(), itEnd = m_patterns.end(); it != itEnd; ++it ) { if( !(*it)->matches( testCase ) ) return false; } return true; } }; public: bool hasFilters() const { return !m_filters.empty(); } bool matches( TestCaseInfo const& testCase ) const { // A TestSpec matches if any filter matches for( std::vector::const_iterator it = m_filters.begin(), itEnd = m_filters.end(); it != itEnd; ++it ) if( it->matches( testCase ) ) return true; return false; } private: std::vector m_filters; friend class TestSpecParser; }; } #ifdef __clang__ #pragma clang diagnostic pop #endif namespace Catch { class TestSpecParser { enum Mode{ None, Name, QuotedName, Tag, EscapedName }; Mode m_mode; bool m_exclusion; std::size_t m_start, m_pos; std::string m_arg; std::vector m_escapeChars; TestSpec::Filter m_currentFilter; TestSpec m_testSpec; ITagAliasRegistry const* m_tagAliases; public: TestSpecParser( ITagAliasRegistry const& tagAliases ) : m_tagAliases( &tagAliases ) {} TestSpecParser& parse( std::string const& arg ) { m_mode = None; m_exclusion = false; m_start = std::string::npos; m_arg = m_tagAliases->expandAliases( arg ); m_escapeChars.clear(); for( m_pos = 0; m_pos < m_arg.size(); ++m_pos ) visitChar( m_arg[m_pos] ); if( m_mode == Name ) addPattern(); return *this; } TestSpec testSpec() { addFilter(); return m_testSpec; } private: void visitChar( char c ) { if( m_mode == None ) { switch( c ) { case ' ': return; case '~': m_exclusion = true; return; case '[': return startNewMode( Tag, ++m_pos ); case '"': return startNewMode( QuotedName, ++m_pos ); case '\\': return escape(); default: startNewMode( Name, m_pos ); break; } } if( m_mode == Name ) { if( c == ',' ) { addPattern(); addFilter(); } else if( c == '[' ) { if( subString() == "exclude:" ) m_exclusion = true; else addPattern(); startNewMode( Tag, ++m_pos ); } else if( c == '\\' ) escape(); } else if( m_mode == EscapedName ) m_mode = Name; else if( m_mode == QuotedName && c == '"' ) addPattern(); else if( m_mode == Tag && c == ']' ) addPattern(); } void startNewMode( Mode mode, std::size_t start ) { m_mode = mode; m_start = start; } void escape() { m_mode = EscapedName; m_escapeChars.push_back( m_pos ); } std::string subString() const { return m_arg.substr( m_start, m_pos - m_start ); } template void addPattern() { std::string token = subString(); for( size_t i = 0; i < m_escapeChars.size(); ++i ) token = token.substr( 0, m_escapeChars[i] ) + token.substr( m_escapeChars[i]+1 ); m_escapeChars.clear(); if( startsWith( token, "exclude:" ) ) { m_exclusion = true; token = token.substr( 8 ); } if( !token.empty() ) { Ptr pattern = new T( token ); if( m_exclusion ) pattern = new TestSpec::ExcludedPattern( pattern ); m_currentFilter.m_patterns.push_back( pattern ); } m_exclusion = false; m_mode = None; } void addFilter() { if( !m_currentFilter.m_patterns.empty() ) { m_testSpec.m_filters.push_back( m_currentFilter ); m_currentFilter = TestSpec::Filter(); } } }; inline TestSpec parseTestSpec( std::string const& arg ) { return TestSpecParser( ITagAliasRegistry::get() ).parse( arg ).testSpec(); } } // namespace Catch #ifdef __clang__ #pragma clang diagnostic pop #endif // #included from: catch_interfaces_config.h #define TWOBLUECUBES_CATCH_INTERFACES_CONFIG_H_INCLUDED #include #include #include namespace Catch { struct Verbosity { enum Level { NoOutput = 0, Quiet, Normal }; }; struct WarnAbout { enum What { Nothing = 0x00, NoAssertions = 0x01 }; }; struct ShowDurations { enum OrNot { DefaultForReporter, Always, Never }; }; struct RunTests { enum InWhatOrder { InDeclarationOrder, InLexicographicalOrder, InRandomOrder }; }; struct UseColour { enum YesOrNo { Auto, Yes, No }; }; class TestSpec; struct IConfig : IShared { virtual ~IConfig(); virtual bool allowThrows() const = 0; virtual std::ostream& stream() const = 0; virtual std::string name() const = 0; virtual bool includeSuccessfulResults() const = 0; virtual bool shouldDebugBreak() const = 0; virtual bool warnAboutMissingAssertions() const = 0; virtual int abortAfter() const = 0; virtual bool showInvisibles() const = 0; virtual ShowDurations::OrNot showDurations() const = 0; virtual TestSpec const& testSpec() const = 0; virtual RunTests::InWhatOrder runOrder() const = 0; virtual unsigned int rngSeed() const = 0; virtual UseColour::YesOrNo useColour() const = 0; }; } // #included from: catch_stream.h #define TWOBLUECUBES_CATCH_STREAM_H_INCLUDED // #included from: catch_streambuf.h #define TWOBLUECUBES_CATCH_STREAMBUF_H_INCLUDED #include namespace Catch { class StreamBufBase : public std::streambuf { public: virtual ~StreamBufBase() CATCH_NOEXCEPT; }; } #include #include #include #include namespace Catch { std::ostream& cout(); std::ostream& cerr(); struct IStream { virtual ~IStream() CATCH_NOEXCEPT; virtual std::ostream& stream() const = 0; }; class FileStream : public IStream { mutable std::ofstream m_ofs; public: FileStream( std::string const& filename ); virtual ~FileStream() CATCH_NOEXCEPT; public: // IStream virtual std::ostream& stream() const CATCH_OVERRIDE; }; class CoutStream : public IStream { mutable std::ostream m_os; public: CoutStream(); virtual ~CoutStream() CATCH_NOEXCEPT; public: // IStream virtual std::ostream& stream() const CATCH_OVERRIDE; }; class DebugOutStream : public IStream { CATCH_AUTO_PTR( StreamBufBase ) m_streamBuf; mutable std::ostream m_os; public: DebugOutStream(); virtual ~DebugOutStream() CATCH_NOEXCEPT; public: // IStream virtual std::ostream& stream() const CATCH_OVERRIDE; }; } #include #include #include #include #include #ifndef CATCH_CONFIG_CONSOLE_WIDTH #define CATCH_CONFIG_CONSOLE_WIDTH 80 #endif namespace Catch { struct ConfigData { ConfigData() : listTests( false ), listTags( false ), listReporters( false ), listTestNamesOnly( false ), showSuccessfulTests( false ), shouldDebugBreak( false ), noThrow( false ), showHelp( false ), showInvisibles( false ), filenamesAsTags( false ), abortAfter( -1 ), rngSeed( 0 ), verbosity( Verbosity::Normal ), warnings( WarnAbout::Nothing ), showDurations( ShowDurations::DefaultForReporter ), runOrder( RunTests::InDeclarationOrder ), useColour( UseColour::Auto ) {} bool listTests; bool listTags; bool listReporters; bool listTestNamesOnly; bool showSuccessfulTests; bool shouldDebugBreak; bool noThrow; bool showHelp; bool showInvisibles; bool filenamesAsTags; int abortAfter; unsigned int rngSeed; Verbosity::Level verbosity; WarnAbout::What warnings; ShowDurations::OrNot showDurations; RunTests::InWhatOrder runOrder; UseColour::YesOrNo useColour; std::string outputFilename; std::string name; std::string processName; std::vector reporterNames; std::vector testsOrTags; }; class Config : public SharedImpl { private: Config( Config const& other ); Config& operator = ( Config const& other ); virtual void dummy(); public: Config() {} Config( ConfigData const& data ) : m_data( data ), m_stream( openStream() ) { if( !data.testsOrTags.empty() ) { TestSpecParser parser( ITagAliasRegistry::get() ); for( std::size_t i = 0; i < data.testsOrTags.size(); ++i ) parser.parse( data.testsOrTags[i] ); m_testSpec = parser.testSpec(); } } virtual ~Config() { } std::string const& getFilename() const { return m_data.outputFilename ; } bool listTests() const { return m_data.listTests; } bool listTestNamesOnly() const { return m_data.listTestNamesOnly; } bool listTags() const { return m_data.listTags; } bool listReporters() const { return m_data.listReporters; } std::string getProcessName() const { return m_data.processName; } bool shouldDebugBreak() const { return m_data.shouldDebugBreak; } std::vector getReporterNames() const { return m_data.reporterNames; } int abortAfter() const { return m_data.abortAfter; } TestSpec const& testSpec() const { return m_testSpec; } bool showHelp() const { return m_data.showHelp; } bool showInvisibles() const { return m_data.showInvisibles; } // IConfig interface virtual bool allowThrows() const { return !m_data.noThrow; } virtual std::ostream& stream() const { return m_stream->stream(); } virtual std::string name() const { return m_data.name.empty() ? m_data.processName : m_data.name; } virtual bool includeSuccessfulResults() const { return m_data.showSuccessfulTests; } virtual bool warnAboutMissingAssertions() const { return m_data.warnings & WarnAbout::NoAssertions; } virtual ShowDurations::OrNot showDurations() const { return m_data.showDurations; } virtual RunTests::InWhatOrder runOrder() const { return m_data.runOrder; } virtual unsigned int rngSeed() const { return m_data.rngSeed; } virtual UseColour::YesOrNo useColour() const { return m_data.useColour; } private: IStream const* openStream() { if( m_data.outputFilename.empty() ) return new CoutStream(); else if( m_data.outputFilename[0] == '%' ) { if( m_data.outputFilename == "%debug" ) return new DebugOutStream(); else throw std::domain_error( "Unrecognised stream: " + m_data.outputFilename ); } else return new FileStream( m_data.outputFilename ); } ConfigData m_data; CATCH_AUTO_PTR( IStream const ) m_stream; TestSpec m_testSpec; }; } // end namespace Catch // #included from: catch_clara.h #define TWOBLUECUBES_CATCH_CLARA_H_INCLUDED // Use Catch's value for console width (store Clara's off to the side, if present) #ifdef CLARA_CONFIG_CONSOLE_WIDTH #define CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH CLARA_CONFIG_CONSOLE_WIDTH #undef CLARA_CONFIG_CONSOLE_WIDTH #endif #define CLARA_CONFIG_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH // Declare Clara inside the Catch namespace #define STITCH_CLARA_OPEN_NAMESPACE namespace Catch { // #included from: ../external/clara.h // Version 0.0.2.4 // Only use header guard if we are not using an outer namespace #if !defined(TWOBLUECUBES_CLARA_H_INCLUDED) || defined(STITCH_CLARA_OPEN_NAMESPACE) #ifndef STITCH_CLARA_OPEN_NAMESPACE #define TWOBLUECUBES_CLARA_H_INCLUDED #define STITCH_CLARA_OPEN_NAMESPACE #define STITCH_CLARA_CLOSE_NAMESPACE #else #define STITCH_CLARA_CLOSE_NAMESPACE } #endif #define STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE STITCH_CLARA_OPEN_NAMESPACE // ----------- #included from tbc_text_format.h ----------- // Only use header guard if we are not using an outer namespace #if !defined(TBC_TEXT_FORMAT_H_INCLUDED) || defined(STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE) #ifndef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE #define TBC_TEXT_FORMAT_H_INCLUDED #endif #include #include #include #include // Use optional outer namespace #ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE namespace STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE { #endif namespace Tbc { #ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH; #else const unsigned int consoleWidth = 80; #endif struct TextAttributes { TextAttributes() : initialIndent( std::string::npos ), indent( 0 ), width( consoleWidth-1 ), tabChar( '\t' ) {} TextAttributes& setInitialIndent( std::size_t _value ) { initialIndent = _value; return *this; } TextAttributes& setIndent( std::size_t _value ) { indent = _value; return *this; } TextAttributes& setWidth( std::size_t _value ) { width = _value; return *this; } TextAttributes& setTabChar( char _value ) { tabChar = _value; return *this; } std::size_t initialIndent; // indent of first line, or npos std::size_t indent; // indent of subsequent lines, or all if initialIndent is npos std::size_t width; // maximum width of text, including indent. Longer text will wrap char tabChar; // If this char is seen the indent is changed to current pos }; class Text { public: Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() ) : attr( _attr ) { std::string wrappableChars = " [({.,/|\\-"; std::size_t indent = _attr.initialIndent != std::string::npos ? _attr.initialIndent : _attr.indent; std::string remainder = _str; while( !remainder.empty() ) { if( lines.size() >= 1000 ) { lines.push_back( "... message truncated due to excessive size" ); return; } std::size_t tabPos = std::string::npos; std::size_t width = (std::min)( remainder.size(), _attr.width - indent ); std::size_t pos = remainder.find_first_of( '\n' ); if( pos <= width ) { width = pos; } pos = remainder.find_last_of( _attr.tabChar, width ); if( pos != std::string::npos ) { tabPos = pos; if( remainder[width] == '\n' ) width--; remainder = remainder.substr( 0, tabPos ) + remainder.substr( tabPos+1 ); } if( width == remainder.size() ) { spliceLine( indent, remainder, width ); } else if( remainder[width] == '\n' ) { spliceLine( indent, remainder, width ); if( width <= 1 || remainder.size() != 1 ) remainder = remainder.substr( 1 ); indent = _attr.indent; } else { pos = remainder.find_last_of( wrappableChars, width ); if( pos != std::string::npos && pos > 0 ) { spliceLine( indent, remainder, pos ); if( remainder[0] == ' ' ) remainder = remainder.substr( 1 ); } else { spliceLine( indent, remainder, width-1 ); lines.back() += "-"; } if( lines.size() == 1 ) indent = _attr.indent; if( tabPos != std::string::npos ) indent += tabPos; } } } void spliceLine( std::size_t _indent, std::string& _remainder, std::size_t _pos ) { lines.push_back( std::string( _indent, ' ' ) + _remainder.substr( 0, _pos ) ); _remainder = _remainder.substr( _pos ); } typedef std::vector::const_iterator const_iterator; const_iterator begin() const { return lines.begin(); } const_iterator end() const { return lines.end(); } std::string const& last() const { return lines.back(); } std::size_t size() const { return lines.size(); } std::string const& operator[]( std::size_t _index ) const { return lines[_index]; } std::string toString() const { std::ostringstream oss; oss << *this; return oss.str(); } inline friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) { for( Text::const_iterator it = _text.begin(), itEnd = _text.end(); it != itEnd; ++it ) { if( it != _text.begin() ) _stream << "\n"; _stream << *it; } return _stream; } private: std::string str; TextAttributes attr; std::vector lines; }; } // end namespace Tbc #ifdef STITCH_TBC_TEXT_FORMAT_OUTER_NAMESPACE } // end outer namespace #endif #endif // TBC_TEXT_FORMAT_H_INCLUDED // ----------- end of #include from tbc_text_format.h ----------- // ........... back in clara.h #undef STITCH_TBC_TEXT_FORMAT_OPEN_NAMESPACE // ----------- #included from clara_compilers.h ----------- #ifndef TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED #define TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED // Detect a number of compiler features - mostly C++11/14 conformance - by compiler // The following features are defined: // // CLARA_CONFIG_CPP11_NULLPTR : is nullptr supported? // CLARA_CONFIG_CPP11_NOEXCEPT : is noexcept supported? // CLARA_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods // CLARA_CONFIG_CPP11_OVERRIDE : is override supported? // CLARA_CONFIG_CPP11_UNIQUE_PTR : is unique_ptr supported (otherwise use auto_ptr) // CLARA_CONFIG_CPP11_OR_GREATER : Is C++11 supported? // CLARA_CONFIG_VARIADIC_MACROS : are variadic macros supported? // In general each macro has a _NO_ form // (e.g. CLARA_CONFIG_CPP11_NO_NULLPTR) which disables the feature. // Many features, at point of detection, define an _INTERNAL_ macro, so they // can be combined, en-mass, with the _NO_ forms later. // All the C++11 features can be disabled with CLARA_CONFIG_NO_CPP11 #ifdef __clang__ #if __has_feature(cxx_nullptr) #define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR #endif #if __has_feature(cxx_noexcept) #define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT #endif #endif // __clang__ //////////////////////////////////////////////////////////////////////////////// // GCC #ifdef __GNUC__ #if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) #define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR #endif // - otherwise more recent versions define __cplusplus >= 201103L // and will get picked up below #endif // __GNUC__ //////////////////////////////////////////////////////////////////////////////// // Visual C++ #ifdef _MSC_VER #if (_MSC_VER >= 1600) #define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR #define CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR #endif #if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015)) #define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT #define CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS #endif #endif // _MSC_VER //////////////////////////////////////////////////////////////////////////////// // C++ language feature support // catch all support for C++11 #if defined(__cplusplus) && __cplusplus >= 201103L #define CLARA_CPP11_OR_GREATER #if !defined(CLARA_INTERNAL_CONFIG_CPP11_NULLPTR) #define CLARA_INTERNAL_CONFIG_CPP11_NULLPTR #endif #ifndef CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT #define CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT #endif #ifndef CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS #define CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS #endif #if !defined(CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE) #define CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE #endif #if !defined(CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) #define CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR #endif #endif // __cplusplus >= 201103L // Now set the actual defines based on the above + anything the user has configured #if defined(CLARA_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CLARA_CONFIG_CPP11_NO_NULLPTR) && !defined(CLARA_CONFIG_CPP11_NULLPTR) && !defined(CLARA_CONFIG_NO_CPP11) #define CLARA_CONFIG_CPP11_NULLPTR #endif #if defined(CLARA_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CLARA_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_CONFIG_NO_CPP11) #define CLARA_CONFIG_CPP11_NOEXCEPT #endif #if defined(CLARA_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CLARA_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CLARA_CONFIG_CPP11_GENERATED_METHODS) && !defined(CLARA_CONFIG_NO_CPP11) #define CLARA_CONFIG_CPP11_GENERATED_METHODS #endif #if defined(CLARA_INTERNAL_CONFIG_CPP11_OVERRIDE) && !defined(CLARA_CONFIG_NO_OVERRIDE) && !defined(CLARA_CONFIG_CPP11_OVERRIDE) && !defined(CLARA_CONFIG_NO_CPP11) #define CLARA_CONFIG_CPP11_OVERRIDE #endif #if defined(CLARA_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) && !defined(CLARA_CONFIG_NO_UNIQUE_PTR) && !defined(CLARA_CONFIG_CPP11_UNIQUE_PTR) && !defined(CLARA_CONFIG_NO_CPP11) #define CLARA_CONFIG_CPP11_UNIQUE_PTR #endif // noexcept support: #if defined(CLARA_CONFIG_CPP11_NOEXCEPT) && !defined(CLARA_NOEXCEPT) #define CLARA_NOEXCEPT noexcept # define CLARA_NOEXCEPT_IS(x) noexcept(x) #else #define CLARA_NOEXCEPT throw() # define CLARA_NOEXCEPT_IS(x) #endif // nullptr support #ifdef CLARA_CONFIG_CPP11_NULLPTR #define CLARA_NULL nullptr #else #define CLARA_NULL NULL #endif // override support #ifdef CLARA_CONFIG_CPP11_OVERRIDE #define CLARA_OVERRIDE override #else #define CLARA_OVERRIDE #endif // unique_ptr support #ifdef CLARA_CONFIG_CPP11_UNIQUE_PTR # define CLARA_AUTO_PTR( T ) std::unique_ptr #else # define CLARA_AUTO_PTR( T ) std::auto_ptr #endif #endif // TWOBLUECUBES_CLARA_COMPILERS_H_INCLUDED // ----------- end of #include from clara_compilers.h ----------- // ........... back in clara.h #include #include #include #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) #define CLARA_PLATFORM_WINDOWS #endif // Use optional outer namespace #ifdef STITCH_CLARA_OPEN_NAMESPACE STITCH_CLARA_OPEN_NAMESPACE #endif namespace Clara { struct UnpositionalTag {}; extern UnpositionalTag _; #ifdef CLARA_CONFIG_MAIN UnpositionalTag _; #endif namespace Detail { #ifdef CLARA_CONSOLE_WIDTH const unsigned int consoleWidth = CLARA_CONFIG_CONSOLE_WIDTH; #else const unsigned int consoleWidth = 80; #endif using namespace Tbc; inline bool startsWith( std::string const& str, std::string const& prefix ) { return str.size() >= prefix.size() && str.substr( 0, prefix.size() ) == prefix; } template struct RemoveConstRef{ typedef T type; }; template struct RemoveConstRef{ typedef T type; }; template struct RemoveConstRef{ typedef T type; }; template struct RemoveConstRef{ typedef T type; }; template struct IsBool { static const bool value = false; }; template<> struct IsBool { static const bool value = true; }; template void convertInto( std::string const& _source, T& _dest ) { std::stringstream ss; ss << _source; ss >> _dest; if( ss.fail() ) throw std::runtime_error( "Unable to convert " + _source + " to destination type" ); } inline void convertInto( std::string const& _source, std::string& _dest ) { _dest = _source; } char toLowerCh(char c) { return static_cast( ::tolower( c ) ); } inline void convertInto( std::string const& _source, bool& _dest ) { std::string sourceLC = _source; std::transform( sourceLC.begin(), sourceLC.end(), sourceLC.begin(), toLowerCh ); if( sourceLC == "y" || sourceLC == "1" || sourceLC == "true" || sourceLC == "yes" || sourceLC == "on" ) _dest = true; else if( sourceLC == "n" || sourceLC == "0" || sourceLC == "false" || sourceLC == "no" || sourceLC == "off" ) _dest = false; else throw std::runtime_error( "Expected a boolean value but did not recognise:\n '" + _source + "'" ); } template struct IArgFunction { virtual ~IArgFunction() {} #ifdef CLARA_CONFIG_CPP11_GENERATED_METHODS IArgFunction() = default; IArgFunction( IArgFunction const& ) = default; #endif virtual void set( ConfigT& config, std::string const& value ) const = 0; virtual bool takesArg() const = 0; virtual IArgFunction* clone() const = 0; }; template class BoundArgFunction { public: BoundArgFunction() : functionObj( CLARA_NULL ) {} BoundArgFunction( IArgFunction* _functionObj ) : functionObj( _functionObj ) {} BoundArgFunction( BoundArgFunction const& other ) : functionObj( other.functionObj ? other.functionObj->clone() : CLARA_NULL ) {} BoundArgFunction& operator = ( BoundArgFunction const& other ) { IArgFunction* newFunctionObj = other.functionObj ? other.functionObj->clone() : CLARA_NULL; delete functionObj; functionObj = newFunctionObj; return *this; } ~BoundArgFunction() { delete functionObj; } void set( ConfigT& config, std::string const& value ) const { functionObj->set( config, value ); } bool takesArg() const { return functionObj->takesArg(); } bool isSet() const { return functionObj != CLARA_NULL; } private: IArgFunction* functionObj; }; template struct NullBinder : IArgFunction{ virtual void set( C&, std::string const& ) const {} virtual bool takesArg() const { return true; } virtual IArgFunction* clone() const { return new NullBinder( *this ); } }; template struct BoundDataMember : IArgFunction{ BoundDataMember( M C::* _member ) : member( _member ) {} virtual void set( C& p, std::string const& stringValue ) const { convertInto( stringValue, p.*member ); } virtual bool takesArg() const { return !IsBool::value; } virtual IArgFunction* clone() const { return new BoundDataMember( *this ); } M C::* member; }; template struct BoundUnaryMethod : IArgFunction{ BoundUnaryMethod( void (C::*_member)( M ) ) : member( _member ) {} virtual void set( C& p, std::string const& stringValue ) const { typename RemoveConstRef::type value; convertInto( stringValue, value ); (p.*member)( value ); } virtual bool takesArg() const { return !IsBool::value; } virtual IArgFunction* clone() const { return new BoundUnaryMethod( *this ); } void (C::*member)( M ); }; template struct BoundNullaryMethod : IArgFunction{ BoundNullaryMethod( void (C::*_member)() ) : member( _member ) {} virtual void set( C& p, std::string const& stringValue ) const { bool value; convertInto( stringValue, value ); if( value ) (p.*member)(); } virtual bool takesArg() const { return false; } virtual IArgFunction* clone() const { return new BoundNullaryMethod( *this ); } void (C::*member)(); }; template struct BoundUnaryFunction : IArgFunction{ BoundUnaryFunction( void (*_function)( C& ) ) : function( _function ) {} virtual void set( C& obj, std::string const& stringValue ) const { bool value; convertInto( stringValue, value ); if( value ) function( obj ); } virtual bool takesArg() const { return false; } virtual IArgFunction* clone() const { return new BoundUnaryFunction( *this ); } void (*function)( C& ); }; template struct BoundBinaryFunction : IArgFunction{ BoundBinaryFunction( void (*_function)( C&, T ) ) : function( _function ) {} virtual void set( C& obj, std::string const& stringValue ) const { typename RemoveConstRef::type value; convertInto( stringValue, value ); function( obj, value ); } virtual bool takesArg() const { return !IsBool::value; } virtual IArgFunction* clone() const { return new BoundBinaryFunction( *this ); } void (*function)( C&, T ); }; } // namespace Detail inline std::vector argsToVector( int argc, char const* const* const argv ) { std::vector args( static_cast( argc ) ); for( std::size_t i = 0; i < static_cast( argc ); ++i ) args[i] = argv[i]; return args; } class Parser { enum Mode { None, MaybeShortOpt, SlashOpt, ShortOpt, LongOpt, Positional }; Mode mode; std::size_t from; bool inQuotes; public: struct Token { enum Type { Positional, ShortOpt, LongOpt }; Token( Type _type, std::string const& _data ) : type( _type ), data( _data ) {} Type type; std::string data; }; Parser() : mode( None ), from( 0 ), inQuotes( false ){} void parseIntoTokens( std::vector const& args, std::vector& tokens ) { const std::string doubleDash = "--"; for( std::size_t i = 1; i < args.size() && args[i] != doubleDash; ++i ) parseIntoTokens( args[i], tokens); } void parseIntoTokens( std::string const& arg, std::vector& tokens ) { for( std::size_t i = 0; i <= arg.size(); ++i ) { char c = arg[i]; if( c == '"' ) inQuotes = !inQuotes; mode = handleMode( i, c, arg, tokens ); } } Mode handleMode( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { switch( mode ) { case None: return handleNone( i, c ); case MaybeShortOpt: return handleMaybeShortOpt( i, c ); case ShortOpt: case LongOpt: case SlashOpt: return handleOpt( i, c, arg, tokens ); case Positional: return handlePositional( i, c, arg, tokens ); default: throw std::logic_error( "Unknown mode" ); } } Mode handleNone( std::size_t i, char c ) { if( inQuotes ) { from = i; return Positional; } switch( c ) { case '-': return MaybeShortOpt; #ifdef CLARA_PLATFORM_WINDOWS case '/': from = i+1; return SlashOpt; #endif default: from = i; return Positional; } } Mode handleMaybeShortOpt( std::size_t i, char c ) { switch( c ) { case '-': from = i+1; return LongOpt; default: from = i; return ShortOpt; } } Mode handleOpt( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { if( std::string( ":=\0", 3 ).find( c ) == std::string::npos ) return mode; std::string optName = arg.substr( from, i-from ); if( mode == ShortOpt ) for( std::size_t j = 0; j < optName.size(); ++j ) tokens.push_back( Token( Token::ShortOpt, optName.substr( j, 1 ) ) ); else if( mode == SlashOpt && optName.size() == 1 ) tokens.push_back( Token( Token::ShortOpt, optName ) ); else tokens.push_back( Token( Token::LongOpt, optName ) ); return None; } Mode handlePositional( std::size_t i, char c, std::string const& arg, std::vector& tokens ) { if( inQuotes || std::string( "\0", 1 ).find( c ) == std::string::npos ) return mode; std::string data = arg.substr( from, i-from ); tokens.push_back( Token( Token::Positional, data ) ); return None; } }; template struct CommonArgProperties { CommonArgProperties() {} CommonArgProperties( Detail::BoundArgFunction const& _boundField ) : boundField( _boundField ) {} Detail::BoundArgFunction boundField; std::string description; std::string detail; std::string placeholder; // Only value if boundField takes an arg bool takesArg() const { return !placeholder.empty(); } void validate() const { if( !boundField.isSet() ) throw std::logic_error( "option not bound" ); } }; struct OptionArgProperties { std::vector shortNames; std::string longName; bool hasShortName( std::string const& shortName ) const { return std::find( shortNames.begin(), shortNames.end(), shortName ) != shortNames.end(); } bool hasLongName( std::string const& _longName ) const { return _longName == longName; } }; struct PositionalArgProperties { PositionalArgProperties() : position( -1 ) {} int position; // -1 means non-positional (floating) bool isFixedPositional() const { return position != -1; } }; template class CommandLine { struct Arg : CommonArgProperties, OptionArgProperties, PositionalArgProperties { Arg() {} Arg( Detail::BoundArgFunction const& _boundField ) : CommonArgProperties( _boundField ) {} using CommonArgProperties::placeholder; // !TBD std::string dbgName() const { if( !longName.empty() ) return "--" + longName; if( !shortNames.empty() ) return "-" + shortNames[0]; return "positional args"; } std::string commands() const { std::ostringstream oss; bool first = true; std::vector::const_iterator it = shortNames.begin(), itEnd = shortNames.end(); for(; it != itEnd; ++it ) { if( first ) first = false; else oss << ", "; oss << "-" << *it; } if( !longName.empty() ) { if( !first ) oss << ", "; oss << "--" << longName; } if( !placeholder.empty() ) oss << " <" << placeholder << ">"; return oss.str(); } }; typedef CLARA_AUTO_PTR( Arg ) ArgAutoPtr; friend void addOptName( Arg& arg, std::string const& optName ) { if( optName.empty() ) return; if( Detail::startsWith( optName, "--" ) ) { if( !arg.longName.empty() ) throw std::logic_error( "Only one long opt may be specified. '" + arg.longName + "' already specified, now attempting to add '" + optName + "'" ); arg.longName = optName.substr( 2 ); } else if( Detail::startsWith( optName, "-" ) ) arg.shortNames.push_back( optName.substr( 1 ) ); else throw std::logic_error( "option must begin with - or --. Option was: '" + optName + "'" ); } friend void setPositionalArg( Arg& arg, int position ) { arg.position = position; } class ArgBuilder { public: ArgBuilder( Arg* arg ) : m_arg( arg ) {} // Bind a non-boolean data member (requires placeholder string) template void bind( M C::* field, std::string const& placeholder ) { m_arg->boundField = new Detail::BoundDataMember( field ); m_arg->placeholder = placeholder; } // Bind a boolean data member (no placeholder required) template void bind( bool C::* field ) { m_arg->boundField = new Detail::BoundDataMember( field ); } // Bind a method taking a single, non-boolean argument (requires a placeholder string) template void bind( void (C::* unaryMethod)( M ), std::string const& placeholder ) { m_arg->boundField = new Detail::BoundUnaryMethod( unaryMethod ); m_arg->placeholder = placeholder; } // Bind a method taking a single, boolean argument (no placeholder string required) template void bind( void (C::* unaryMethod)( bool ) ) { m_arg->boundField = new Detail::BoundUnaryMethod( unaryMethod ); } // Bind a method that takes no arguments (will be called if opt is present) template void bind( void (C::* nullaryMethod)() ) { m_arg->boundField = new Detail::BoundNullaryMethod( nullaryMethod ); } // Bind a free function taking a single argument - the object to operate on (no placeholder string required) template void bind( void (* unaryFunction)( C& ) ) { m_arg->boundField = new Detail::BoundUnaryFunction( unaryFunction ); } // Bind a free function taking a single argument - the object to operate on (requires a placeholder string) template void bind( void (* binaryFunction)( C&, T ), std::string const& placeholder ) { m_arg->boundField = new Detail::BoundBinaryFunction( binaryFunction ); m_arg->placeholder = placeholder; } ArgBuilder& describe( std::string const& description ) { m_arg->description = description; return *this; } ArgBuilder& detail( std::string const& detail ) { m_arg->detail = detail; return *this; } protected: Arg* m_arg; }; class OptBuilder : public ArgBuilder { public: OptBuilder( Arg* arg ) : ArgBuilder( arg ) {} OptBuilder( OptBuilder& other ) : ArgBuilder( other ) {} OptBuilder& operator[]( std::string const& optName ) { addOptName( *ArgBuilder::m_arg, optName ); return *this; } }; public: CommandLine() : m_boundProcessName( new Detail::NullBinder() ), m_highestSpecifiedArgPosition( 0 ), m_throwOnUnrecognisedTokens( false ) {} CommandLine( CommandLine const& other ) : m_boundProcessName( other.m_boundProcessName ), m_options ( other.m_options ), m_positionalArgs( other.m_positionalArgs ), m_highestSpecifiedArgPosition( other.m_highestSpecifiedArgPosition ), m_throwOnUnrecognisedTokens( other.m_throwOnUnrecognisedTokens ) { if( other.m_floatingArg.get() ) m_floatingArg.reset( new Arg( *other.m_floatingArg ) ); } CommandLine& setThrowOnUnrecognisedTokens( bool shouldThrow = true ) { m_throwOnUnrecognisedTokens = shouldThrow; return *this; } OptBuilder operator[]( std::string const& optName ) { m_options.push_back( Arg() ); addOptName( m_options.back(), optName ); OptBuilder builder( &m_options.back() ); return builder; } ArgBuilder operator[]( int position ) { m_positionalArgs.insert( std::make_pair( position, Arg() ) ); if( position > m_highestSpecifiedArgPosition ) m_highestSpecifiedArgPosition = position; setPositionalArg( m_positionalArgs[position], position ); ArgBuilder builder( &m_positionalArgs[position] ); return builder; } // Invoke this with the _ instance ArgBuilder operator[]( UnpositionalTag ) { if( m_floatingArg.get() ) throw std::logic_error( "Only one unpositional argument can be added" ); m_floatingArg.reset( new Arg() ); ArgBuilder builder( m_floatingArg.get() ); return builder; } template void bindProcessName( M C::* field ) { m_boundProcessName = new Detail::BoundDataMember( field ); } template void bindProcessName( void (C::*_unaryMethod)( M ) ) { m_boundProcessName = new Detail::BoundUnaryMethod( _unaryMethod ); } void optUsage( std::ostream& os, std::size_t indent = 0, std::size_t width = Detail::consoleWidth ) const { typename std::vector::const_iterator itBegin = m_options.begin(), itEnd = m_options.end(), it; std::size_t maxWidth = 0; for( it = itBegin; it != itEnd; ++it ) maxWidth = (std::max)( maxWidth, it->commands().size() ); for( it = itBegin; it != itEnd; ++it ) { Detail::Text usage( it->commands(), Detail::TextAttributes() .setWidth( maxWidth+indent ) .setIndent( indent ) ); Detail::Text desc( it->description, Detail::TextAttributes() .setWidth( width - maxWidth - 3 ) ); for( std::size_t i = 0; i < (std::max)( usage.size(), desc.size() ); ++i ) { std::string usageCol = i < usage.size() ? usage[i] : ""; os << usageCol; if( i < desc.size() && !desc[i].empty() ) os << std::string( indent + 2 + maxWidth - usageCol.size(), ' ' ) << desc[i]; os << "\n"; } } } std::string optUsage() const { std::ostringstream oss; optUsage( oss ); return oss.str(); } void argSynopsis( std::ostream& os ) const { for( int i = 1; i <= m_highestSpecifiedArgPosition; ++i ) { if( i > 1 ) os << " "; typename std::map::const_iterator it = m_positionalArgs.find( i ); if( it != m_positionalArgs.end() ) os << "<" << it->second.placeholder << ">"; else if( m_floatingArg.get() ) os << "<" << m_floatingArg->placeholder << ">"; else throw std::logic_error( "non consecutive positional arguments with no floating args" ); } // !TBD No indication of mandatory args if( m_floatingArg.get() ) { if( m_highestSpecifiedArgPosition > 1 ) os << " "; os << "[<" << m_floatingArg->placeholder << "> ...]"; } } std::string argSynopsis() const { std::ostringstream oss; argSynopsis( oss ); return oss.str(); } void usage( std::ostream& os, std::string const& procName ) const { validate(); os << "usage:\n " << procName << " "; argSynopsis( os ); if( !m_options.empty() ) { os << " [options]\n\nwhere options are: \n"; optUsage( os, 2 ); } os << "\n"; } std::string usage( std::string const& procName ) const { std::ostringstream oss; usage( oss, procName ); return oss.str(); } ConfigT parse( std::vector const& args ) const { ConfigT config; parseInto( args, config ); return config; } std::vector parseInto( std::vector const& args, ConfigT& config ) const { std::string processName = args[0]; std::size_t lastSlash = processName.find_last_of( "/\\" ); if( lastSlash != std::string::npos ) processName = processName.substr( lastSlash+1 ); m_boundProcessName.set( config, processName ); std::vector tokens; Parser parser; parser.parseIntoTokens( args, tokens ); return populate( tokens, config ); } std::vector populate( std::vector const& tokens, ConfigT& config ) const { validate(); std::vector unusedTokens = populateOptions( tokens, config ); unusedTokens = populateFixedArgs( unusedTokens, config ); unusedTokens = populateFloatingArgs( unusedTokens, config ); return unusedTokens; } std::vector populateOptions( std::vector const& tokens, ConfigT& config ) const { std::vector unusedTokens; std::vector errors; for( std::size_t i = 0; i < tokens.size(); ++i ) { Parser::Token const& token = tokens[i]; typename std::vector::const_iterator it = m_options.begin(), itEnd = m_options.end(); for(; it != itEnd; ++it ) { Arg const& arg = *it; try { if( ( token.type == Parser::Token::ShortOpt && arg.hasShortName( token.data ) ) || ( token.type == Parser::Token::LongOpt && arg.hasLongName( token.data ) ) ) { if( arg.takesArg() ) { if( i == tokens.size()-1 || tokens[i+1].type != Parser::Token::Positional ) errors.push_back( "Expected argument to option: " + token.data ); else arg.boundField.set( config, tokens[++i].data ); } else { arg.boundField.set( config, "true" ); } break; } } catch( std::exception& ex ) { errors.push_back( std::string( ex.what() ) + "\n- while parsing: (" + arg.commands() + ")" ); } } if( it == itEnd ) { if( token.type == Parser::Token::Positional || !m_throwOnUnrecognisedTokens ) unusedTokens.push_back( token ); else if( errors.empty() && m_throwOnUnrecognisedTokens ) errors.push_back( "unrecognised option: " + token.data ); } } if( !errors.empty() ) { std::ostringstream oss; for( std::vector::const_iterator it = errors.begin(), itEnd = errors.end(); it != itEnd; ++it ) { if( it != errors.begin() ) oss << "\n"; oss << *it; } throw std::runtime_error( oss.str() ); } return unusedTokens; } std::vector populateFixedArgs( std::vector const& tokens, ConfigT& config ) const { std::vector unusedTokens; int position = 1; for( std::size_t i = 0; i < tokens.size(); ++i ) { Parser::Token const& token = tokens[i]; typename std::map::const_iterator it = m_positionalArgs.find( position ); if( it != m_positionalArgs.end() ) it->second.boundField.set( config, token.data ); else unusedTokens.push_back( token ); if( token.type == Parser::Token::Positional ) position++; } return unusedTokens; } std::vector populateFloatingArgs( std::vector const& tokens, ConfigT& config ) const { if( !m_floatingArg.get() ) return tokens; std::vector unusedTokens; for( std::size_t i = 0; i < tokens.size(); ++i ) { Parser::Token const& token = tokens[i]; if( token.type == Parser::Token::Positional ) m_floatingArg->boundField.set( config, token.data ); else unusedTokens.push_back( token ); } return unusedTokens; } void validate() const { if( m_options.empty() && m_positionalArgs.empty() && !m_floatingArg.get() ) throw std::logic_error( "No options or arguments specified" ); for( typename std::vector::const_iterator it = m_options.begin(), itEnd = m_options.end(); it != itEnd; ++it ) it->validate(); } private: Detail::BoundArgFunction m_boundProcessName; std::vector m_options; std::map m_positionalArgs; ArgAutoPtr m_floatingArg; int m_highestSpecifiedArgPosition; bool m_throwOnUnrecognisedTokens; }; } // end namespace Clara STITCH_CLARA_CLOSE_NAMESPACE #undef STITCH_CLARA_OPEN_NAMESPACE #undef STITCH_CLARA_CLOSE_NAMESPACE #endif // TWOBLUECUBES_CLARA_H_INCLUDED #undef STITCH_CLARA_OPEN_NAMESPACE // Restore Clara's value for console width, if present #ifdef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH #define CLARA_CONFIG_CONSOLE_WIDTH CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH #undef CATCH_TEMP_CLARA_CONFIG_CONSOLE_WIDTH #endif #include namespace Catch { inline void abortAfterFirst( ConfigData& config ) { config.abortAfter = 1; } inline void abortAfterX( ConfigData& config, int x ) { if( x < 1 ) throw std::runtime_error( "Value after -x or --abortAfter must be greater than zero" ); config.abortAfter = x; } inline void addTestOrTags( ConfigData& config, std::string const& _testSpec ) { config.testsOrTags.push_back( _testSpec ); } inline void addReporterName( ConfigData& config, std::string const& _reporterName ) { config.reporterNames.push_back( _reporterName ); } inline void addWarning( ConfigData& config, std::string const& _warning ) { if( _warning == "NoAssertions" ) config.warnings = static_cast( config.warnings | WarnAbout::NoAssertions ); else throw std::runtime_error( "Unrecognised warning: '" + _warning + "'" ); } inline void setOrder( ConfigData& config, std::string const& order ) { if( startsWith( "declared", order ) ) config.runOrder = RunTests::InDeclarationOrder; else if( startsWith( "lexical", order ) ) config.runOrder = RunTests::InLexicographicalOrder; else if( startsWith( "random", order ) ) config.runOrder = RunTests::InRandomOrder; else throw std::runtime_error( "Unrecognised ordering: '" + order + "'" ); } inline void setRngSeed( ConfigData& config, std::string const& seed ) { if( seed == "time" ) { config.rngSeed = static_cast( std::time(0) ); } else { std::stringstream ss; ss << seed; ss >> config.rngSeed; if( ss.fail() ) throw std::runtime_error( "Argment to --rng-seed should be the word 'time' or a number" ); } } inline void setVerbosity( ConfigData& config, int level ) { // !TBD: accept strings? config.verbosity = static_cast( level ); } inline void setShowDurations( ConfigData& config, bool _showDurations ) { config.showDurations = _showDurations ? ShowDurations::Always : ShowDurations::Never; } inline void setUseColour( ConfigData& config, std::string const& value ) { std::string mode = toLower( value ); if( mode == "yes" ) config.useColour = UseColour::Yes; else if( mode == "no" ) config.useColour = UseColour::No; else if( mode == "auto" ) config.useColour = UseColour::Auto; else throw std::runtime_error( "colour mode must be one of: auto, yes or no" ); } inline void forceColour( ConfigData& config ) { config.useColour = UseColour::Yes; } inline void loadTestNamesFromFile( ConfigData& config, std::string const& _filename ) { std::ifstream f( _filename.c_str() ); if( !f.is_open() ) throw std::domain_error( "Unable to load input file: " + _filename ); std::string line; while( std::getline( f, line ) ) { line = trim(line); if( !line.empty() && !startsWith( line, "#" ) ) { if( !startsWith( line, "\"" ) ) line = "\"" + line + "\""; addTestOrTags( config, line + "," ); } } } inline Clara::CommandLine makeCommandLineParser() { using namespace Clara; CommandLine cli; cli.bindProcessName( &ConfigData::processName ); cli["-?"]["-h"]["--help"] .describe( "display usage information" ) .bind( &ConfigData::showHelp ); cli["-l"]["--list-tests"] .describe( "list all/matching test cases" ) .bind( &ConfigData::listTests ); cli["-t"]["--list-tags"] .describe( "list all/matching tags" ) .bind( &ConfigData::listTags ); cli["-s"]["--success"] .describe( "include successful tests in output" ) .bind( &ConfigData::showSuccessfulTests ); cli["-b"]["--break"] .describe( "break into debugger on failure" ) .bind( &ConfigData::shouldDebugBreak ); cli["-e"]["--nothrow"] .describe( "skip exception tests" ) .bind( &ConfigData::noThrow ); cli["-i"]["--invisibles"] .describe( "show invisibles (tabs, newlines)" ) .bind( &ConfigData::showInvisibles ); cli["-o"]["--out"] .describe( "output filename" ) .bind( &ConfigData::outputFilename, "filename" ); cli["-r"]["--reporter"] // .placeholder( "name[:filename]" ) .describe( "reporter to use (defaults to console)" ) .bind( &addReporterName, "name" ); cli["-n"]["--name"] .describe( "suite name" ) .bind( &ConfigData::name, "name" ); cli["-a"]["--abort"] .describe( "abort at first failure" ) .bind( &abortAfterFirst ); cli["-x"]["--abortx"] .describe( "abort after x failures" ) .bind( &abortAfterX, "no. failures" ); cli["-w"]["--warn"] .describe( "enable warnings" ) .bind( &addWarning, "warning name" ); // - needs updating if reinstated // cli.into( &setVerbosity ) // .describe( "level of verbosity (0=no output)" ) // .shortOpt( "v") // .longOpt( "verbosity" ) // .placeholder( "level" ); cli[_] .describe( "which test or tests to use" ) .bind( &addTestOrTags, "test name, pattern or tags" ); cli["-d"]["--durations"] .describe( "show test durations" ) .bind( &setShowDurations, "yes|no" ); cli["-f"]["--input-file"] .describe( "load test names to run from a file" ) .bind( &loadTestNamesFromFile, "filename" ); cli["-#"]["--filenames-as-tags"] .describe( "adds a tag for the filename" ) .bind( &ConfigData::filenamesAsTags ); // Less common commands which don't have a short form cli["--list-test-names-only"] .describe( "list all/matching test cases names only" ) .bind( &ConfigData::listTestNamesOnly ); cli["--list-reporters"] .describe( "list all reporters" ) .bind( &ConfigData::listReporters ); cli["--order"] .describe( "test case order (defaults to decl)" ) .bind( &setOrder, "decl|lex|rand" ); cli["--rng-seed"] .describe( "set a specific seed for random numbers" ) .bind( &setRngSeed, "'time'|number" ); cli["--force-colour"] .describe( "force colourised output (deprecated)" ) .bind( &forceColour ); cli["--use-colour"] .describe( "should output be colourised" ) .bind( &setUseColour, "yes|no" ); return cli; } } // end namespace Catch // #included from: internal/catch_list.hpp #define TWOBLUECUBES_CATCH_LIST_HPP_INCLUDED // #included from: catch_text.h #define TWOBLUECUBES_CATCH_TEXT_H_INCLUDED #define TBC_TEXT_FORMAT_CONSOLE_WIDTH CATCH_CONFIG_CONSOLE_WIDTH #define CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE Catch // #included from: ../external/tbc_text_format.h // Only use header guard if we are not using an outer namespace #ifndef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE # ifdef TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED # ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED # define TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED # endif # else # define TWOBLUECUBES_TEXT_FORMAT_H_INCLUDED # endif #endif #ifndef TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED #include #include #include // Use optional outer namespace #ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE namespace CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE { #endif namespace Tbc { #ifdef TBC_TEXT_FORMAT_CONSOLE_WIDTH const unsigned int consoleWidth = TBC_TEXT_FORMAT_CONSOLE_WIDTH; #else const unsigned int consoleWidth = 80; #endif struct TextAttributes { TextAttributes() : initialIndent( std::string::npos ), indent( 0 ), width( consoleWidth-1 ), tabChar( '\t' ) {} TextAttributes& setInitialIndent( std::size_t _value ) { initialIndent = _value; return *this; } TextAttributes& setIndent( std::size_t _value ) { indent = _value; return *this; } TextAttributes& setWidth( std::size_t _value ) { width = _value; return *this; } TextAttributes& setTabChar( char _value ) { tabChar = _value; return *this; } std::size_t initialIndent; // indent of first line, or npos std::size_t indent; // indent of subsequent lines, or all if initialIndent is npos std::size_t width; // maximum width of text, including indent. Longer text will wrap char tabChar; // If this char is seen the indent is changed to current pos }; class Text { public: Text( std::string const& _str, TextAttributes const& _attr = TextAttributes() ) : attr( _attr ) { std::string wrappableChars = " [({.,/|\\-"; std::size_t indent = _attr.initialIndent != std::string::npos ? _attr.initialIndent : _attr.indent; std::string remainder = _str; while( !remainder.empty() ) { if( lines.size() >= 1000 ) { lines.push_back( "... message truncated due to excessive size" ); return; } std::size_t tabPos = std::string::npos; std::size_t width = (std::min)( remainder.size(), _attr.width - indent ); std::size_t pos = remainder.find_first_of( '\n' ); if( pos <= width ) { width = pos; } pos = remainder.find_last_of( _attr.tabChar, width ); if( pos != std::string::npos ) { tabPos = pos; if( remainder[width] == '\n' ) width--; remainder = remainder.substr( 0, tabPos ) + remainder.substr( tabPos+1 ); } if( width == remainder.size() ) { spliceLine( indent, remainder, width ); } else if( remainder[width] == '\n' ) { spliceLine( indent, remainder, width ); if( width <= 1 || remainder.size() != 1 ) remainder = remainder.substr( 1 ); indent = _attr.indent; } else { pos = remainder.find_last_of( wrappableChars, width ); if( pos != std::string::npos && pos > 0 ) { spliceLine( indent, remainder, pos ); if( remainder[0] == ' ' ) remainder = remainder.substr( 1 ); } else { spliceLine( indent, remainder, width-1 ); lines.back() += "-"; } if( lines.size() == 1 ) indent = _attr.indent; if( tabPos != std::string::npos ) indent += tabPos; } } } void spliceLine( std::size_t _indent, std::string& _remainder, std::size_t _pos ) { lines.push_back( std::string( _indent, ' ' ) + _remainder.substr( 0, _pos ) ); _remainder = _remainder.substr( _pos ); } typedef std::vector::const_iterator const_iterator; const_iterator begin() const { return lines.begin(); } const_iterator end() const { return lines.end(); } std::string const& last() const { return lines.back(); } std::size_t size() const { return lines.size(); } std::string const& operator[]( std::size_t _index ) const { return lines[_index]; } std::string toString() const { std::ostringstream oss; oss << *this; return oss.str(); } inline friend std::ostream& operator << ( std::ostream& _stream, Text const& _text ) { for( Text::const_iterator it = _text.begin(), itEnd = _text.end(); it != itEnd; ++it ) { if( it != _text.begin() ) _stream << "\n"; _stream << *it; } return _stream; } private: std::string str; TextAttributes attr; std::vector lines; }; } // end namespace Tbc #ifdef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE } // end outer namespace #endif #endif // TWOBLUECUBES_TEXT_FORMAT_H_ALREADY_INCLUDED #undef CLICHE_TBC_TEXT_FORMAT_OUTER_NAMESPACE namespace Catch { using Tbc::Text; using Tbc::TextAttributes; } // #included from: catch_console_colour.hpp #define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_HPP_INCLUDED namespace Catch { struct Colour { enum Code { None = 0, White, Red, Green, Blue, Cyan, Yellow, Grey, Bright = 0x10, BrightRed = Bright | Red, BrightGreen = Bright | Green, LightGrey = Bright | Grey, BrightWhite = Bright | White, // By intention FileName = LightGrey, Warning = Yellow, ResultError = BrightRed, ResultSuccess = BrightGreen, ResultExpectedFailure = Warning, Error = BrightRed, Success = Green, OriginalExpression = Cyan, ReconstructedExpression = Yellow, SecondaryText = LightGrey, Headers = White }; // Use constructed object for RAII guard Colour( Code _colourCode ); Colour( Colour const& other ); ~Colour(); // Use static method for one-shot changes static void use( Code _colourCode ); private: bool m_moved; }; inline std::ostream& operator << ( std::ostream& os, Colour const& ) { return os; } } // end namespace Catch // #included from: catch_interfaces_reporter.h #define TWOBLUECUBES_CATCH_INTERFACES_REPORTER_H_INCLUDED #include #include #include #include namespace Catch { struct ReporterConfig { explicit ReporterConfig( Ptr const& _fullConfig ) : m_stream( &_fullConfig->stream() ), m_fullConfig( _fullConfig ) {} ReporterConfig( Ptr const& _fullConfig, std::ostream& _stream ) : m_stream( &_stream ), m_fullConfig( _fullConfig ) {} std::ostream& stream() const { return *m_stream; } Ptr fullConfig() const { return m_fullConfig; } private: std::ostream* m_stream; Ptr m_fullConfig; }; struct ReporterPreferences { ReporterPreferences() : shouldRedirectStdOut( false ) {} bool shouldRedirectStdOut; }; template struct LazyStat : Option { LazyStat() : used( false ) {} LazyStat& operator=( T const& _value ) { Option::operator=( _value ); used = false; return *this; } void reset() { Option::reset(); used = false; } bool used; }; struct TestRunInfo { TestRunInfo( std::string const& _name ) : name( _name ) {} std::string name; }; struct GroupInfo { GroupInfo( std::string const& _name, std::size_t _groupIndex, std::size_t _groupsCount ) : name( _name ), groupIndex( _groupIndex ), groupsCounts( _groupsCount ) {} std::string name; std::size_t groupIndex; std::size_t groupsCounts; }; struct AssertionStats { AssertionStats( AssertionResult const& _assertionResult, std::vector const& _infoMessages, Totals const& _totals ) : assertionResult( _assertionResult ), infoMessages( _infoMessages ), totals( _totals ) { if( assertionResult.hasMessage() ) { // Copy message into messages list. // !TBD This should have been done earlier, somewhere MessageBuilder builder( assertionResult.getTestMacroName(), assertionResult.getSourceInfo(), assertionResult.getResultType() ); builder << assertionResult.getMessage(); builder.m_info.message = builder.m_stream.str(); infoMessages.push_back( builder.m_info ); } } virtual ~AssertionStats(); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS AssertionStats( AssertionStats const& ) = default; AssertionStats( AssertionStats && ) = default; AssertionStats& operator = ( AssertionStats const& ) = default; AssertionStats& operator = ( AssertionStats && ) = default; # endif AssertionResult assertionResult; std::vector infoMessages; Totals totals; }; struct SectionStats { SectionStats( SectionInfo const& _sectionInfo, Counts const& _assertions, double _durationInSeconds, bool _missingAssertions ) : sectionInfo( _sectionInfo ), assertions( _assertions ), durationInSeconds( _durationInSeconds ), missingAssertions( _missingAssertions ) {} virtual ~SectionStats(); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS SectionStats( SectionStats const& ) = default; SectionStats( SectionStats && ) = default; SectionStats& operator = ( SectionStats const& ) = default; SectionStats& operator = ( SectionStats && ) = default; # endif SectionInfo sectionInfo; Counts assertions; double durationInSeconds; bool missingAssertions; }; struct TestCaseStats { TestCaseStats( TestCaseInfo const& _testInfo, Totals const& _totals, std::string const& _stdOut, std::string const& _stdErr, bool _aborting ) : testInfo( _testInfo ), totals( _totals ), stdOut( _stdOut ), stdErr( _stdErr ), aborting( _aborting ) {} virtual ~TestCaseStats(); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS TestCaseStats( TestCaseStats const& ) = default; TestCaseStats( TestCaseStats && ) = default; TestCaseStats& operator = ( TestCaseStats const& ) = default; TestCaseStats& operator = ( TestCaseStats && ) = default; # endif TestCaseInfo testInfo; Totals totals; std::string stdOut; std::string stdErr; bool aborting; }; struct TestGroupStats { TestGroupStats( GroupInfo const& _groupInfo, Totals const& _totals, bool _aborting ) : groupInfo( _groupInfo ), totals( _totals ), aborting( _aborting ) {} TestGroupStats( GroupInfo const& _groupInfo ) : groupInfo( _groupInfo ), aborting( false ) {} virtual ~TestGroupStats(); # ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS TestGroupStats( TestGroupStats const& ) = default; TestGroupStats( TestGroupStats && ) = default; TestGroupStats& operator = ( TestGroupStats const& ) = default; TestGroupStats& operator = ( TestGroupStats && ) = default; # endif GroupInfo groupInfo; Totals totals; bool aborting; }; struct TestRunStats { TestRunStats( TestRunInfo const& _runInfo, Totals const& _totals, bool _aborting ) : runInfo( _runInfo ), totals( _totals ), aborting( _aborting ) {} virtual ~TestRunStats(); # ifndef CATCH_CONFIG_CPP11_GENERATED_METHODS TestRunStats( TestRunStats const& _other ) : runInfo( _other.runInfo ), totals( _other.totals ), aborting( _other.aborting ) {} # else TestRunStats( TestRunStats const& ) = default; TestRunStats( TestRunStats && ) = default; TestRunStats& operator = ( TestRunStats const& ) = default; TestRunStats& operator = ( TestRunStats && ) = default; # endif TestRunInfo runInfo; Totals totals; bool aborting; }; class MultipleReporters; struct IStreamingReporter : IShared { virtual ~IStreamingReporter(); // Implementing class must also provide the following static method: // static std::string getDescription(); virtual ReporterPreferences getPreferences() const = 0; virtual void noMatchingTestCases( std::string const& spec ) = 0; virtual void testRunStarting( TestRunInfo const& testRunInfo ) = 0; virtual void testGroupStarting( GroupInfo const& groupInfo ) = 0; virtual void testCaseStarting( TestCaseInfo const& testInfo ) = 0; virtual void sectionStarting( SectionInfo const& sectionInfo ) = 0; virtual void assertionStarting( AssertionInfo const& assertionInfo ) = 0; // The return value indicates if the messages buffer should be cleared: virtual bool assertionEnded( AssertionStats const& assertionStats ) = 0; virtual void sectionEnded( SectionStats const& sectionStats ) = 0; virtual void testCaseEnded( TestCaseStats const& testCaseStats ) = 0; virtual void testGroupEnded( TestGroupStats const& testGroupStats ) = 0; virtual void testRunEnded( TestRunStats const& testRunStats ) = 0; virtual void skipTest( TestCaseInfo const& testInfo ) = 0; virtual MultipleReporters* tryAsMulti() { return CATCH_NULL; } }; struct IReporterFactory : IShared { virtual ~IReporterFactory(); virtual IStreamingReporter* create( ReporterConfig const& config ) const = 0; virtual std::string getDescription() const = 0; }; struct IReporterRegistry { typedef std::map > FactoryMap; typedef std::vector > Listeners; virtual ~IReporterRegistry(); virtual IStreamingReporter* create( std::string const& name, Ptr const& config ) const = 0; virtual FactoryMap const& getFactories() const = 0; virtual Listeners const& getListeners() const = 0; }; Ptr addReporter( Ptr const& existingReporter, Ptr const& additionalReporter ); } #include #include namespace Catch { inline std::size_t listTests( Config const& config ) { TestSpec testSpec = config.testSpec(); if( config.testSpec().hasFilters() ) Catch::cout() << "Matching test cases:\n"; else { Catch::cout() << "All available test cases:\n"; testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); } std::size_t matchedTests = 0; TextAttributes nameAttr, tagsAttr; nameAttr.setInitialIndent( 2 ).setIndent( 4 ); tagsAttr.setIndent( 6 ); std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); it != itEnd; ++it ) { matchedTests++; TestCaseInfo const& testCaseInfo = it->getTestCaseInfo(); Colour::Code colour = testCaseInfo.isHidden() ? Colour::SecondaryText : Colour::None; Colour colourGuard( colour ); Catch::cout() << Text( testCaseInfo.name, nameAttr ) << std::endl; if( !testCaseInfo.tags.empty() ) Catch::cout() << Text( testCaseInfo.tagsAsString, tagsAttr ) << std::endl; } if( !config.testSpec().hasFilters() ) Catch::cout() << pluralise( matchedTests, "test case" ) << "\n" << std::endl; else Catch::cout() << pluralise( matchedTests, "matching test case" ) << "\n" << std::endl; return matchedTests; } inline std::size_t listTestsNamesOnly( Config const& config ) { TestSpec testSpec = config.testSpec(); if( !config.testSpec().hasFilters() ) testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); std::size_t matchedTests = 0; std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); it != itEnd; ++it ) { matchedTests++; TestCaseInfo const& testCaseInfo = it->getTestCaseInfo(); if( startsWith( testCaseInfo.name, "#" ) ) Catch::cout() << "\"" << testCaseInfo.name << "\"" << std::endl; else Catch::cout() << testCaseInfo.name << std::endl; } return matchedTests; } struct TagInfo { TagInfo() : count ( 0 ) {} void add( std::string const& spelling ) { ++count; spellings.insert( spelling ); } std::string all() const { std::string out; for( std::set::const_iterator it = spellings.begin(), itEnd = spellings.end(); it != itEnd; ++it ) out += "[" + *it + "]"; return out; } std::set spellings; std::size_t count; }; inline std::size_t listTags( Config const& config ) { TestSpec testSpec = config.testSpec(); if( config.testSpec().hasFilters() ) Catch::cout() << "Tags for matching test cases:\n"; else { Catch::cout() << "All available tags:\n"; testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "*" ).testSpec(); } std::map tagCounts; std::vector matchedTestCases = filterTests( getAllTestCasesSorted( config ), testSpec, config ); for( std::vector::const_iterator it = matchedTestCases.begin(), itEnd = matchedTestCases.end(); it != itEnd; ++it ) { for( std::set::const_iterator tagIt = it->getTestCaseInfo().tags.begin(), tagItEnd = it->getTestCaseInfo().tags.end(); tagIt != tagItEnd; ++tagIt ) { std::string tagName = *tagIt; std::string lcaseTagName = toLower( tagName ); std::map::iterator countIt = tagCounts.find( lcaseTagName ); if( countIt == tagCounts.end() ) countIt = tagCounts.insert( std::make_pair( lcaseTagName, TagInfo() ) ).first; countIt->second.add( tagName ); } } for( std::map::const_iterator countIt = tagCounts.begin(), countItEnd = tagCounts.end(); countIt != countItEnd; ++countIt ) { std::ostringstream oss; oss << " " << std::setw(2) << countIt->second.count << " "; Text wrapper( countIt->second.all(), TextAttributes() .setInitialIndent( 0 ) .setIndent( oss.str().size() ) .setWidth( CATCH_CONFIG_CONSOLE_WIDTH-10 ) ); Catch::cout() << oss.str() << wrapper << "\n"; } Catch::cout() << pluralise( tagCounts.size(), "tag" ) << "\n" << std::endl; return tagCounts.size(); } inline std::size_t listReporters( Config const& /*config*/ ) { Catch::cout() << "Available reporters:\n"; IReporterRegistry::FactoryMap const& factories = getRegistryHub().getReporterRegistry().getFactories(); IReporterRegistry::FactoryMap::const_iterator itBegin = factories.begin(), itEnd = factories.end(), it; std::size_t maxNameLen = 0; for(it = itBegin; it != itEnd; ++it ) maxNameLen = (std::max)( maxNameLen, it->first.size() ); for(it = itBegin; it != itEnd; ++it ) { Text wrapper( it->second->getDescription(), TextAttributes() .setInitialIndent( 0 ) .setIndent( 7+maxNameLen ) .setWidth( CATCH_CONFIG_CONSOLE_WIDTH - maxNameLen-8 ) ); Catch::cout() << " " << it->first << ":" << std::string( maxNameLen - it->first.size() + 2, ' ' ) << wrapper << "\n"; } Catch::cout() << std::endl; return factories.size(); } inline Option list( Config const& config ) { Option listedCount; if( config.listTests() ) listedCount = listedCount.valueOr(0) + listTests( config ); if( config.listTestNamesOnly() ) listedCount = listedCount.valueOr(0) + listTestsNamesOnly( config ); if( config.listTags() ) listedCount = listedCount.valueOr(0) + listTags( config ); if( config.listReporters() ) listedCount = listedCount.valueOr(0) + listReporters( config ); return listedCount; } } // end namespace Catch // #included from: internal/catch_run_context.hpp #define TWOBLUECUBES_CATCH_RUNNER_IMPL_HPP_INCLUDED // #included from: catch_test_case_tracker.hpp #define TWOBLUECUBES_CATCH_TEST_CASE_TRACKER_HPP_INCLUDED #include #include #include #include namespace Catch { namespace TestCaseTracking { struct ITracker : SharedImpl<> { virtual ~ITracker(); // static queries virtual std::string name() const = 0; // dynamic queries virtual bool isComplete() const = 0; // Successfully completed or failed virtual bool isSuccessfullyCompleted() const = 0; virtual bool isOpen() const = 0; // Started but not complete virtual bool hasChildren() const = 0; virtual ITracker& parent() = 0; // actions virtual void close() = 0; // Successfully complete virtual void fail() = 0; virtual void markAsNeedingAnotherRun() = 0; virtual void addChild( Ptr const& child ) = 0; virtual ITracker* findChild( std::string const& name ) = 0; virtual void openChild() = 0; // Debug/ checking virtual bool isSectionTracker() const = 0; virtual bool isIndexTracker() const = 0; }; class TrackerContext { enum RunState { NotStarted, Executing, CompletedCycle }; Ptr m_rootTracker; ITracker* m_currentTracker; RunState m_runState; public: static TrackerContext& instance() { static TrackerContext s_instance; return s_instance; } TrackerContext() : m_currentTracker( CATCH_NULL ), m_runState( NotStarted ) {} ITracker& startRun(); void endRun() { m_rootTracker.reset(); m_currentTracker = CATCH_NULL; m_runState = NotStarted; } void startCycle() { m_currentTracker = m_rootTracker.get(); m_runState = Executing; } void completeCycle() { m_runState = CompletedCycle; } bool completedCycle() const { return m_runState == CompletedCycle; } ITracker& currentTracker() { return *m_currentTracker; } void setCurrentTracker( ITracker* tracker ) { m_currentTracker = tracker; } }; class TrackerBase : public ITracker { protected: enum CycleState { NotStarted, Executing, ExecutingChildren, NeedsAnotherRun, CompletedSuccessfully, Failed }; class TrackerHasName { std::string m_name; public: TrackerHasName( std::string const& name ) : m_name( name ) {} bool operator ()( Ptr const& tracker ) { return tracker->name() == m_name; } }; typedef std::vector > Children; std::string m_name; TrackerContext& m_ctx; ITracker* m_parent; Children m_children; CycleState m_runState; public: TrackerBase( std::string const& name, TrackerContext& ctx, ITracker* parent ) : m_name( name ), m_ctx( ctx ), m_parent( parent ), m_runState( NotStarted ) {} virtual ~TrackerBase(); virtual std::string name() const CATCH_OVERRIDE { return m_name; } virtual bool isComplete() const CATCH_OVERRIDE { return m_runState == CompletedSuccessfully || m_runState == Failed; } virtual bool isSuccessfullyCompleted() const CATCH_OVERRIDE { return m_runState == CompletedSuccessfully; } virtual bool isOpen() const CATCH_OVERRIDE { return m_runState != NotStarted && !isComplete(); } virtual bool hasChildren() const CATCH_OVERRIDE { return !m_children.empty(); } virtual void addChild( Ptr const& child ) CATCH_OVERRIDE { m_children.push_back( child ); } virtual ITracker* findChild( std::string const& name ) CATCH_OVERRIDE { Children::const_iterator it = std::find_if( m_children.begin(), m_children.end(), TrackerHasName( name ) ); return( it != m_children.end() ) ? it->get() : CATCH_NULL; } virtual ITracker& parent() CATCH_OVERRIDE { assert( m_parent ); // Should always be non-null except for root return *m_parent; } virtual void openChild() CATCH_OVERRIDE { if( m_runState != ExecutingChildren ) { m_runState = ExecutingChildren; if( m_parent ) m_parent->openChild(); } } virtual bool isSectionTracker() const CATCH_OVERRIDE { return false; } virtual bool isIndexTracker() const CATCH_OVERRIDE { return false; } void open() { m_runState = Executing; moveToThis(); if( m_parent ) m_parent->openChild(); } virtual void close() CATCH_OVERRIDE { // Close any still open children (e.g. generators) while( &m_ctx.currentTracker() != this ) m_ctx.currentTracker().close(); switch( m_runState ) { case NotStarted: case CompletedSuccessfully: case Failed: throw std::logic_error( "Illogical state" ); case NeedsAnotherRun: break;; case Executing: m_runState = CompletedSuccessfully; break; case ExecutingChildren: if( m_children.empty() || m_children.back()->isComplete() ) m_runState = CompletedSuccessfully; break; default: throw std::logic_error( "Unexpected state" ); } moveToParent(); m_ctx.completeCycle(); } virtual void fail() CATCH_OVERRIDE { m_runState = Failed; if( m_parent ) m_parent->markAsNeedingAnotherRun(); moveToParent(); m_ctx.completeCycle(); } virtual void markAsNeedingAnotherRun() CATCH_OVERRIDE { m_runState = NeedsAnotherRun; } private: void moveToParent() { assert( m_parent ); m_ctx.setCurrentTracker( m_parent ); } void moveToThis() { m_ctx.setCurrentTracker( this ); } }; class SectionTracker : public TrackerBase { public: SectionTracker( std::string const& name, TrackerContext& ctx, ITracker* parent ) : TrackerBase( name, ctx, parent ) {} virtual ~SectionTracker(); virtual bool isSectionTracker() const CATCH_OVERRIDE { return true; } static SectionTracker& acquire( TrackerContext& ctx, std::string const& name ) { SectionTracker* section = CATCH_NULL; ITracker& currentTracker = ctx.currentTracker(); if( ITracker* childTracker = currentTracker.findChild( name ) ) { assert( childTracker ); assert( childTracker->isSectionTracker() ); section = static_cast( childTracker ); } else { section = new SectionTracker( name, ctx, ¤tTracker ); currentTracker.addChild( section ); } if( !ctx.completedCycle() && !section->isComplete() ) { section->open(); } return *section; } }; class IndexTracker : public TrackerBase { int m_size; int m_index; public: IndexTracker( std::string const& name, TrackerContext& ctx, ITracker* parent, int size ) : TrackerBase( name, ctx, parent ), m_size( size ), m_index( -1 ) {} virtual ~IndexTracker(); virtual bool isIndexTracker() const CATCH_OVERRIDE { return true; } static IndexTracker& acquire( TrackerContext& ctx, std::string const& name, int size ) { IndexTracker* tracker = CATCH_NULL; ITracker& currentTracker = ctx.currentTracker(); if( ITracker* childTracker = currentTracker.findChild( name ) ) { assert( childTracker ); assert( childTracker->isIndexTracker() ); tracker = static_cast( childTracker ); } else { tracker = new IndexTracker( name, ctx, ¤tTracker, size ); currentTracker.addChild( tracker ); } if( !ctx.completedCycle() && !tracker->isComplete() ) { if( tracker->m_runState != ExecutingChildren && tracker->m_runState != NeedsAnotherRun ) tracker->moveNext(); tracker->open(); } return *tracker; } int index() const { return m_index; } void moveNext() { m_index++; m_children.clear(); } virtual void close() CATCH_OVERRIDE { TrackerBase::close(); if( m_runState == CompletedSuccessfully && m_index < m_size-1 ) m_runState = Executing; } }; inline ITracker& TrackerContext::startRun() { m_rootTracker = new SectionTracker( "{root}", *this, CATCH_NULL ); m_currentTracker = CATCH_NULL; m_runState = Executing; return *m_rootTracker; } } // namespace TestCaseTracking using TestCaseTracking::ITracker; using TestCaseTracking::TrackerContext; using TestCaseTracking::SectionTracker; using TestCaseTracking::IndexTracker; } // namespace Catch // #included from: catch_fatal_condition.hpp #define TWOBLUECUBES_CATCH_FATAL_CONDITION_H_INCLUDED namespace Catch { // Report the error condition then exit the process inline void fatal( std::string const& message, int exitCode ) { IContext& context = Catch::getCurrentContext(); IResultCapture* resultCapture = context.getResultCapture(); resultCapture->handleFatalErrorCondition( message ); if( Catch::alwaysTrue() ) // avoids "no return" warnings exit( exitCode ); } } // namespace Catch #if defined ( CATCH_PLATFORM_WINDOWS ) ///////////////////////////////////////// namespace Catch { struct FatalConditionHandler { void reset() {} }; } // namespace Catch #else // Not Windows - assumed to be POSIX compatible ////////////////////////// #include namespace Catch { struct SignalDefs { int id; const char* name; }; extern SignalDefs signalDefs[]; SignalDefs signalDefs[] = { { SIGINT, "SIGINT - Terminal interrupt signal" }, { SIGILL, "SIGILL - Illegal instruction signal" }, { SIGFPE, "SIGFPE - Floating point error signal" }, { SIGSEGV, "SIGSEGV - Segmentation violation signal" }, { SIGTERM, "SIGTERM - Termination request signal" }, { SIGABRT, "SIGABRT - Abort (abnormal termination) signal" } }; struct FatalConditionHandler { static void handleSignal( int sig ) { for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i ) if( sig == signalDefs[i].id ) fatal( signalDefs[i].name, -sig ); fatal( "", -sig ); } FatalConditionHandler() : m_isSet( true ) { for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i ) signal( signalDefs[i].id, handleSignal ); } ~FatalConditionHandler() { reset(); } void reset() { if( m_isSet ) { for( std::size_t i = 0; i < sizeof(signalDefs)/sizeof(SignalDefs); ++i ) signal( signalDefs[i].id, SIG_DFL ); m_isSet = false; } } bool m_isSet; }; } // namespace Catch #endif // not Windows #include #include namespace Catch { class StreamRedirect { public: StreamRedirect( std::ostream& stream, std::string& targetString ) : m_stream( stream ), m_prevBuf( stream.rdbuf() ), m_targetString( targetString ) { stream.rdbuf( m_oss.rdbuf() ); } ~StreamRedirect() { m_targetString += m_oss.str(); m_stream.rdbuf( m_prevBuf ); } private: std::ostream& m_stream; std::streambuf* m_prevBuf; std::ostringstream m_oss; std::string& m_targetString; }; /////////////////////////////////////////////////////////////////////////// class RunContext : public IResultCapture, public IRunner { RunContext( RunContext const& ); void operator =( RunContext const& ); public: explicit RunContext( Ptr const& _config, Ptr const& reporter ) : m_runInfo( _config->name() ), m_context( getCurrentMutableContext() ), m_activeTestCase( CATCH_NULL ), m_config( _config ), m_reporter( reporter ) { m_context.setRunner( this ); m_context.setConfig( m_config ); m_context.setResultCapture( this ); m_reporter->testRunStarting( m_runInfo ); } virtual ~RunContext() { m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, aborting() ) ); } void testGroupStarting( std::string const& testSpec, std::size_t groupIndex, std::size_t groupsCount ) { m_reporter->testGroupStarting( GroupInfo( testSpec, groupIndex, groupsCount ) ); } void testGroupEnded( std::string const& testSpec, Totals const& totals, std::size_t groupIndex, std::size_t groupsCount ) { m_reporter->testGroupEnded( TestGroupStats( GroupInfo( testSpec, groupIndex, groupsCount ), totals, aborting() ) ); } Totals runTest( TestCase const& testCase ) { Totals prevTotals = m_totals; std::string redirectedCout; std::string redirectedCerr; TestCaseInfo testInfo = testCase.getTestCaseInfo(); m_reporter->testCaseStarting( testInfo ); m_activeTestCase = &testCase; do { m_trackerContext.startRun(); do { m_trackerContext.startCycle(); m_testCaseTracker = &SectionTracker::acquire( m_trackerContext, testInfo.name ); runCurrentTest( redirectedCout, redirectedCerr ); } while( !m_testCaseTracker->isSuccessfullyCompleted() && !aborting() ); } // !TBD: deprecated - this will be replaced by indexed trackers while( getCurrentContext().advanceGeneratorsForCurrentTest() && !aborting() ); Totals deltaTotals = m_totals.delta( prevTotals ); if( testInfo.expectedToFail() && deltaTotals.testCases.passed > 0 ) { deltaTotals.assertions.failed++; deltaTotals.testCases.passed--; deltaTotals.testCases.failed++; } m_totals.testCases += deltaTotals.testCases; m_reporter->testCaseEnded( TestCaseStats( testInfo, deltaTotals, redirectedCout, redirectedCerr, aborting() ) ); m_activeTestCase = CATCH_NULL; m_testCaseTracker = CATCH_NULL; return deltaTotals; } Ptr config() const { return m_config; } private: // IResultCapture virtual void assertionEnded( AssertionResult const& result ) { if( result.getResultType() == ResultWas::Ok ) { m_totals.assertions.passed++; } else if( !result.isOk() ) { m_totals.assertions.failed++; } if( m_reporter->assertionEnded( AssertionStats( result, m_messages, m_totals ) ) ) m_messages.clear(); // Reset working state m_lastAssertionInfo = AssertionInfo( "", m_lastAssertionInfo.lineInfo, "{Unknown expression after the reported line}" , m_lastAssertionInfo.resultDisposition ); m_lastResult = result; } virtual bool sectionStarted ( SectionInfo const& sectionInfo, Counts& assertions ) { std::ostringstream oss; oss << sectionInfo.name << "@" << sectionInfo.lineInfo; ITracker& sectionTracker = SectionTracker::acquire( m_trackerContext, oss.str() ); if( !sectionTracker.isOpen() ) return false; m_activeSections.push_back( §ionTracker ); m_lastAssertionInfo.lineInfo = sectionInfo.lineInfo; m_reporter->sectionStarting( sectionInfo ); assertions = m_totals.assertions; return true; } bool testForMissingAssertions( Counts& assertions ) { if( assertions.total() != 0 ) return false; if( !m_config->warnAboutMissingAssertions() ) return false; if( m_trackerContext.currentTracker().hasChildren() ) return false; m_totals.assertions.failed++; assertions.failed++; return true; } virtual void sectionEnded( SectionEndInfo const& endInfo ) { Counts assertions = m_totals.assertions - endInfo.prevAssertions; bool missingAssertions = testForMissingAssertions( assertions ); if( !m_activeSections.empty() ) { m_activeSections.back()->close(); m_activeSections.pop_back(); } m_reporter->sectionEnded( SectionStats( endInfo.sectionInfo, assertions, endInfo.durationInSeconds, missingAssertions ) ); m_messages.clear(); } virtual void sectionEndedEarly( SectionEndInfo const& endInfo ) { if( m_unfinishedSections.empty() ) m_activeSections.back()->fail(); else m_activeSections.back()->close(); m_activeSections.pop_back(); m_unfinishedSections.push_back( endInfo ); } virtual void pushScopedMessage( MessageInfo const& message ) { m_messages.push_back( message ); } virtual void popScopedMessage( MessageInfo const& message ) { m_messages.erase( std::remove( m_messages.begin(), m_messages.end(), message ), m_messages.end() ); } virtual std::string getCurrentTestName() const { return m_activeTestCase ? m_activeTestCase->getTestCaseInfo().name : ""; } virtual const AssertionResult* getLastResult() const { return &m_lastResult; } virtual void handleFatalErrorCondition( std::string const& message ) { ResultBuilder resultBuilder = makeUnexpectedResultBuilder(); resultBuilder.setResultType( ResultWas::FatalErrorCondition ); resultBuilder << message; resultBuilder.captureExpression(); handleUnfinishedSections(); // Recreate section for test case (as we will lose the one that was in scope) TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo(); SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description ); Counts assertions; assertions.failed = 1; SectionStats testCaseSectionStats( testCaseSection, assertions, 0, false ); m_reporter->sectionEnded( testCaseSectionStats ); TestCaseInfo testInfo = m_activeTestCase->getTestCaseInfo(); Totals deltaTotals; deltaTotals.testCases.failed = 1; m_reporter->testCaseEnded( TestCaseStats( testInfo, deltaTotals, "", "", false ) ); m_totals.testCases.failed++; testGroupEnded( "", m_totals, 1, 1 ); m_reporter->testRunEnded( TestRunStats( m_runInfo, m_totals, false ) ); } public: // !TBD We need to do this another way! bool aborting() const { return m_totals.assertions.failed == static_cast( m_config->abortAfter() ); } private: void runCurrentTest( std::string& redirectedCout, std::string& redirectedCerr ) { TestCaseInfo const& testCaseInfo = m_activeTestCase->getTestCaseInfo(); SectionInfo testCaseSection( testCaseInfo.lineInfo, testCaseInfo.name, testCaseInfo.description ); m_reporter->sectionStarting( testCaseSection ); Counts prevAssertions = m_totals.assertions; double duration = 0; try { m_lastAssertionInfo = AssertionInfo( "TEST_CASE", testCaseInfo.lineInfo, "", ResultDisposition::Normal ); seedRng( *m_config ); Timer timer; timer.start(); if( m_reporter->getPreferences().shouldRedirectStdOut ) { StreamRedirect coutRedir( Catch::cout(), redirectedCout ); StreamRedirect cerrRedir( Catch::cerr(), redirectedCerr ); invokeActiveTestCase(); } else { invokeActiveTestCase(); } duration = timer.getElapsedSeconds(); } catch( TestFailureException& ) { // This just means the test was aborted due to failure } catch(...) { makeUnexpectedResultBuilder().useActiveException(); } m_testCaseTracker->close(); handleUnfinishedSections(); m_messages.clear(); Counts assertions = m_totals.assertions - prevAssertions; bool missingAssertions = testForMissingAssertions( assertions ); if( testCaseInfo.okToFail() ) { std::swap( assertions.failedButOk, assertions.failed ); m_totals.assertions.failed -= assertions.failedButOk; m_totals.assertions.failedButOk += assertions.failedButOk; } SectionStats testCaseSectionStats( testCaseSection, assertions, duration, missingAssertions ); m_reporter->sectionEnded( testCaseSectionStats ); } void invokeActiveTestCase() { FatalConditionHandler fatalConditionHandler; // Handle signals m_activeTestCase->invoke(); fatalConditionHandler.reset(); } private: ResultBuilder makeUnexpectedResultBuilder() const { return ResultBuilder( m_lastAssertionInfo.macroName.c_str(), m_lastAssertionInfo.lineInfo, m_lastAssertionInfo.capturedExpression.c_str(), m_lastAssertionInfo.resultDisposition ); } void handleUnfinishedSections() { // If sections ended prematurely due to an exception we stored their // infos here so we can tear them down outside the unwind process. for( std::vector::const_reverse_iterator it = m_unfinishedSections.rbegin(), itEnd = m_unfinishedSections.rend(); it != itEnd; ++it ) sectionEnded( *it ); m_unfinishedSections.clear(); } TestRunInfo m_runInfo; IMutableContext& m_context; TestCase const* m_activeTestCase; ITracker* m_testCaseTracker; ITracker* m_currentSectionTracker; AssertionResult m_lastResult; Ptr m_config; Totals m_totals; Ptr m_reporter; std::vector m_messages; AssertionInfo m_lastAssertionInfo; std::vector m_unfinishedSections; std::vector m_activeSections; TrackerContext m_trackerContext; }; IResultCapture& getResultCapture() { if( IResultCapture* capture = getCurrentContext().getResultCapture() ) return *capture; else throw std::logic_error( "No result capture instance" ); } } // end namespace Catch // #included from: internal/catch_version.h #define TWOBLUECUBES_CATCH_VERSION_H_INCLUDED namespace Catch { // Versioning information struct Version { Version( unsigned int _majorVersion, unsigned int _minorVersion, unsigned int _patchNumber, std::string const& _branchName, unsigned int _buildNumber ); unsigned int const majorVersion; unsigned int const minorVersion; unsigned int const patchNumber; // buildNumber is only used if branchName is not null std::string const branchName; unsigned int const buildNumber; friend std::ostream& operator << ( std::ostream& os, Version const& version ); private: void operator=( Version const& ); }; extern Version libraryVersion; } #include #include #include namespace Catch { Ptr createReporter( std::string const& reporterName, Ptr const& config ) { Ptr reporter = getRegistryHub().getReporterRegistry().create( reporterName, config.get() ); if( !reporter ) { std::ostringstream oss; oss << "No reporter registered with name: '" << reporterName << "'"; throw std::domain_error( oss.str() ); } return reporter; } Ptr makeReporter( Ptr const& config ) { std::vector reporters = config->getReporterNames(); if( reporters.empty() ) reporters.push_back( "console" ); Ptr reporter; for( std::vector::const_iterator it = reporters.begin(), itEnd = reporters.end(); it != itEnd; ++it ) reporter = addReporter( reporter, createReporter( *it, config ) ); return reporter; } Ptr addListeners( Ptr const& config, Ptr reporters ) { IReporterRegistry::Listeners listeners = getRegistryHub().getReporterRegistry().getListeners(); for( IReporterRegistry::Listeners::const_iterator it = listeners.begin(), itEnd = listeners.end(); it != itEnd; ++it ) reporters = addReporter(reporters, (*it)->create( ReporterConfig( config ) ) ); return reporters; } Totals runTests( Ptr const& config ) { Ptr iconfig = config.get(); Ptr reporter = makeReporter( config ); reporter = addListeners( iconfig, reporter ); RunContext context( iconfig, reporter ); Totals totals; context.testGroupStarting( config->name(), 1, 1 ); TestSpec testSpec = config->testSpec(); if( !testSpec.hasFilters() ) testSpec = TestSpecParser( ITagAliasRegistry::get() ).parse( "~[.]" ).testSpec(); // All not hidden tests std::vector const& allTestCases = getAllTestCasesSorted( *iconfig ); for( std::vector::const_iterator it = allTestCases.begin(), itEnd = allTestCases.end(); it != itEnd; ++it ) { if( !context.aborting() && matchTest( *it, testSpec, *iconfig ) ) totals += context.runTest( *it ); else reporter->skipTest( *it ); } context.testGroupEnded( iconfig->name(), totals, 1, 1 ); return totals; } void applyFilenamesAsTags( IConfig const& config ) { std::vector const& tests = getAllTestCasesSorted( config ); for(std::size_t i = 0; i < tests.size(); ++i ) { TestCase& test = const_cast( tests[i] ); std::set tags = test.tags; std::string filename = test.lineInfo.file; std::string::size_type lastSlash = filename.find_last_of( "\\/" ); if( lastSlash != std::string::npos ) filename = filename.substr( lastSlash+1 ); std::string::size_type lastDot = filename.find_last_of( "." ); if( lastDot != std::string::npos ) filename = filename.substr( 0, lastDot ); tags.insert( "#" + filename ); setTags( test, tags ); } } class Session : NonCopyable { static bool alreadyInstantiated; public: struct OnUnusedOptions { enum DoWhat { Ignore, Fail }; }; Session() : m_cli( makeCommandLineParser() ) { if( alreadyInstantiated ) { std::string msg = "Only one instance of Catch::Session can ever be used"; Catch::cerr() << msg << std::endl; throw std::logic_error( msg ); } alreadyInstantiated = true; } ~Session() { Catch::cleanUp(); } void showHelp( std::string const& processName ) { Catch::cout() << "\nCatch v" << libraryVersion << "\n"; m_cli.usage( Catch::cout(), processName ); Catch::cout() << "For more detail usage please see the project docs\n" << std::endl; } int applyCommandLine( int argc, char const* const* const argv, OnUnusedOptions::DoWhat unusedOptionBehaviour = OnUnusedOptions::Fail ) { try { m_cli.setThrowOnUnrecognisedTokens( unusedOptionBehaviour == OnUnusedOptions::Fail ); m_unusedTokens = m_cli.parseInto( Clara::argsToVector( argc, argv ), m_configData ); if( m_configData.showHelp ) showHelp( m_configData.processName ); m_config.reset(); } catch( std::exception& ex ) { { Colour colourGuard( Colour::Red ); Catch::cerr() << "\nError(s) in input:\n" << Text( ex.what(), TextAttributes().setIndent(2) ) << "\n\n"; } m_cli.usage( Catch::cout(), m_configData.processName ); return (std::numeric_limits::max)(); } return 0; } void useConfigData( ConfigData const& _configData ) { m_configData = _configData; m_config.reset(); } int run( int argc, char const* const* const argv ) { int returnCode = applyCommandLine( argc, argv ); if( returnCode == 0 ) returnCode = run(); return returnCode; } int run() { if( m_configData.showHelp ) return 0; try { config(); // Force config to be constructed seedRng( *m_config ); if( m_configData.filenamesAsTags ) applyFilenamesAsTags( *m_config ); // Handle list request if( Option listed = list( config() ) ) return static_cast( *listed ); return static_cast( runTests( m_config ).assertions.failed ); } catch( std::exception& ex ) { Catch::cerr() << ex.what() << std::endl; return (std::numeric_limits::max)(); } } Clara::CommandLine const& cli() const { return m_cli; } std::vector const& unusedTokens() const { return m_unusedTokens; } ConfigData& configData() { return m_configData; } Config& config() { if( !m_config ) m_config = new Config( m_configData ); return *m_config; } private: Clara::CommandLine m_cli; std::vector m_unusedTokens; ConfigData m_configData; Ptr m_config; }; bool Session::alreadyInstantiated = false; } // end namespace Catch // #included from: catch_registry_hub.hpp #define TWOBLUECUBES_CATCH_REGISTRY_HUB_HPP_INCLUDED // #included from: catch_test_case_registry_impl.hpp #define TWOBLUECUBES_CATCH_TEST_CASE_REGISTRY_IMPL_HPP_INCLUDED #include #include #include #include #include namespace Catch { struct RandomNumberGenerator { typedef std::ptrdiff_t result_type; result_type operator()( result_type n ) const { return std::rand() % n; } #ifdef CATCH_CONFIG_CPP11_SHUFFLE static constexpr result_type min() { return 0; } static constexpr result_type max() { return 1000000; } result_type operator()() const { return std::rand() % max(); } #endif template static void shuffle( V& vector ) { RandomNumberGenerator rng; #ifdef CATCH_CONFIG_CPP11_SHUFFLE std::shuffle( vector.begin(), vector.end(), rng ); #else std::random_shuffle( vector.begin(), vector.end(), rng ); #endif } }; inline std::vector sortTests( IConfig const& config, std::vector const& unsortedTestCases ) { std::vector sorted = unsortedTestCases; switch( config.runOrder() ) { case RunTests::InLexicographicalOrder: std::sort( sorted.begin(), sorted.end() ); break; case RunTests::InRandomOrder: { seedRng( config ); RandomNumberGenerator::shuffle( sorted ); } break; case RunTests::InDeclarationOrder: // already in declaration order break; } return sorted; } bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ) { return testSpec.matches( testCase ) && ( config.allowThrows() || !testCase.throws() ); } void enforceNoDuplicateTestCases( std::vector const& functions ) { std::set seenFunctions; for( std::vector::const_iterator it = functions.begin(), itEnd = functions.end(); it != itEnd; ++it ) { std::pair::const_iterator, bool> prev = seenFunctions.insert( *it ); if( !prev.second ) { std::ostringstream ss; ss << Colour( Colour::Red ) << "error: TEST_CASE( \"" << it->name << "\" ) already defined.\n" << "\tFirst seen at " << prev.first->getTestCaseInfo().lineInfo << "\n" << "\tRedefined at " << it->getTestCaseInfo().lineInfo << std::endl; throw std::runtime_error(ss.str()); } } } std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ) { std::vector filtered; filtered.reserve( testCases.size() ); for( std::vector::const_iterator it = testCases.begin(), itEnd = testCases.end(); it != itEnd; ++it ) if( matchTest( *it, testSpec, config ) ) filtered.push_back( *it ); return filtered; } std::vector const& getAllTestCasesSorted( IConfig const& config ) { return getRegistryHub().getTestCaseRegistry().getAllTestsSorted( config ); } class TestRegistry : public ITestCaseRegistry { public: TestRegistry() : m_currentSortOrder( RunTests::InDeclarationOrder ), m_unnamedCount( 0 ) {} virtual ~TestRegistry(); virtual void registerTest( TestCase const& testCase ) { std::string name = testCase.getTestCaseInfo().name; if( name == "" ) { std::ostringstream oss; oss << "Anonymous test case " << ++m_unnamedCount; return registerTest( testCase.withName( oss.str() ) ); } m_functions.push_back( testCase ); } virtual std::vector const& getAllTests() const { return m_functions; } virtual std::vector const& getAllTestsSorted( IConfig const& config ) const { if( m_sortedFunctions.empty() ) enforceNoDuplicateTestCases( m_functions ); if( m_currentSortOrder != config.runOrder() || m_sortedFunctions.empty() ) { m_sortedFunctions = sortTests( config, m_functions ); m_currentSortOrder = config.runOrder(); } return m_sortedFunctions; } private: std::vector m_functions; mutable RunTests::InWhatOrder m_currentSortOrder; mutable std::vector m_sortedFunctions; size_t m_unnamedCount; std::ios_base::Init m_ostreamInit; // Forces cout/ cerr to be initialised }; /////////////////////////////////////////////////////////////////////////// class FreeFunctionTestCase : public SharedImpl { public: FreeFunctionTestCase( TestFunction fun ) : m_fun( fun ) {} virtual void invoke() const { m_fun(); } private: virtual ~FreeFunctionTestCase(); TestFunction m_fun; }; inline std::string extractClassName( std::string const& classOrQualifiedMethodName ) { std::string className = classOrQualifiedMethodName; if( startsWith( className, "&" ) ) { std::size_t lastColons = className.rfind( "::" ); std::size_t penultimateColons = className.rfind( "::", lastColons-1 ); if( penultimateColons == std::string::npos ) penultimateColons = 1; className = className.substr( penultimateColons, lastColons-penultimateColons ); } return className; } void registerTestCase ( ITestCase* testCase, char const* classOrQualifiedMethodName, NameAndDesc const& nameAndDesc, SourceLineInfo const& lineInfo ) { getMutableRegistryHub().registerTest ( makeTestCase ( testCase, extractClassName( classOrQualifiedMethodName ), nameAndDesc.name, nameAndDesc.description, lineInfo ) ); } void registerTestCaseFunction ( TestFunction function, SourceLineInfo const& lineInfo, NameAndDesc const& nameAndDesc ) { registerTestCase( new FreeFunctionTestCase( function ), "", nameAndDesc, lineInfo ); } /////////////////////////////////////////////////////////////////////////// AutoReg::AutoReg ( TestFunction function, SourceLineInfo const& lineInfo, NameAndDesc const& nameAndDesc ) { registerTestCaseFunction( function, lineInfo, nameAndDesc ); } AutoReg::~AutoReg() {} } // end namespace Catch // #included from: catch_reporter_registry.hpp #define TWOBLUECUBES_CATCH_REPORTER_REGISTRY_HPP_INCLUDED #include namespace Catch { class ReporterRegistry : public IReporterRegistry { public: virtual ~ReporterRegistry() CATCH_OVERRIDE {} virtual IStreamingReporter* create( std::string const& name, Ptr const& config ) const CATCH_OVERRIDE { FactoryMap::const_iterator it = m_factories.find( name ); if( it == m_factories.end() ) return CATCH_NULL; return it->second->create( ReporterConfig( config ) ); } void registerReporter( std::string const& name, Ptr const& factory ) { m_factories.insert( std::make_pair( name, factory ) ); } void registerListener( Ptr const& factory ) { m_listeners.push_back( factory ); } virtual FactoryMap const& getFactories() const CATCH_OVERRIDE { return m_factories; } virtual Listeners const& getListeners() const CATCH_OVERRIDE { return m_listeners; } private: FactoryMap m_factories; Listeners m_listeners; }; } // #included from: catch_exception_translator_registry.hpp #define TWOBLUECUBES_CATCH_EXCEPTION_TRANSLATOR_REGISTRY_HPP_INCLUDED #ifdef __OBJC__ #import "Foundation/Foundation.h" #endif namespace Catch { class ExceptionTranslatorRegistry : public IExceptionTranslatorRegistry { public: ~ExceptionTranslatorRegistry() { deleteAll( m_translators ); } virtual void registerTranslator( const IExceptionTranslator* translator ) { m_translators.push_back( translator ); } virtual std::string translateActiveException() const { try { #ifdef __OBJC__ // In Objective-C try objective-c exceptions first @try { return tryTranslators(); } @catch (NSException *exception) { return Catch::toString( [exception description] ); } #else return tryTranslators(); #endif } catch( TestFailureException& ) { throw; } catch( std::exception& ex ) { return ex.what(); } catch( std::string& msg ) { return msg; } catch( const char* msg ) { return msg; } catch(...) { return "Unknown exception"; } } std::string tryTranslators() const { if( m_translators.empty() ) throw; else return m_translators[0]->translate( m_translators.begin()+1, m_translators.end() ); } private: std::vector m_translators; }; } namespace Catch { namespace { class RegistryHub : public IRegistryHub, public IMutableRegistryHub { RegistryHub( RegistryHub const& ); void operator=( RegistryHub const& ); public: // IRegistryHub RegistryHub() { } virtual IReporterRegistry const& getReporterRegistry() const CATCH_OVERRIDE { return m_reporterRegistry; } virtual ITestCaseRegistry const& getTestCaseRegistry() const CATCH_OVERRIDE { return m_testCaseRegistry; } virtual IExceptionTranslatorRegistry& getExceptionTranslatorRegistry() CATCH_OVERRIDE { return m_exceptionTranslatorRegistry; } public: // IMutableRegistryHub virtual void registerReporter( std::string const& name, Ptr const& factory ) CATCH_OVERRIDE { m_reporterRegistry.registerReporter( name, factory ); } virtual void registerListener( Ptr const& factory ) CATCH_OVERRIDE { m_reporterRegistry.registerListener( factory ); } virtual void registerTest( TestCase const& testInfo ) CATCH_OVERRIDE { m_testCaseRegistry.registerTest( testInfo ); } virtual void registerTranslator( const IExceptionTranslator* translator ) CATCH_OVERRIDE { m_exceptionTranslatorRegistry.registerTranslator( translator ); } private: TestRegistry m_testCaseRegistry; ReporterRegistry m_reporterRegistry; ExceptionTranslatorRegistry m_exceptionTranslatorRegistry; }; // Single, global, instance inline RegistryHub*& getTheRegistryHub() { static RegistryHub* theRegistryHub = CATCH_NULL; if( !theRegistryHub ) theRegistryHub = new RegistryHub(); return theRegistryHub; } } IRegistryHub& getRegistryHub() { return *getTheRegistryHub(); } IMutableRegistryHub& getMutableRegistryHub() { return *getTheRegistryHub(); } void cleanUp() { delete getTheRegistryHub(); getTheRegistryHub() = CATCH_NULL; cleanUpContext(); } std::string translateActiveException() { return getRegistryHub().getExceptionTranslatorRegistry().translateActiveException(); } } // end namespace Catch // #included from: catch_notimplemented_exception.hpp #define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_HPP_INCLUDED #include namespace Catch { NotImplementedException::NotImplementedException( SourceLineInfo const& lineInfo ) : m_lineInfo( lineInfo ) { std::ostringstream oss; oss << lineInfo << ": function "; oss << "not implemented"; m_what = oss.str(); } const char* NotImplementedException::what() const CATCH_NOEXCEPT { return m_what.c_str(); } } // end namespace Catch // #included from: catch_context_impl.hpp #define TWOBLUECUBES_CATCH_CONTEXT_IMPL_HPP_INCLUDED // #included from: catch_stream.hpp #define TWOBLUECUBES_CATCH_STREAM_HPP_INCLUDED #include #include #include namespace Catch { template class StreamBufImpl : public StreamBufBase { char data[bufferSize]; WriterF m_writer; public: StreamBufImpl() { setp( data, data + sizeof(data) ); } ~StreamBufImpl() CATCH_NOEXCEPT { sync(); } private: int overflow( int c ) { sync(); if( c != EOF ) { if( pbase() == epptr() ) m_writer( std::string( 1, static_cast( c ) ) ); else sputc( static_cast( c ) ); } return 0; } int sync() { if( pbase() != pptr() ) { m_writer( std::string( pbase(), static_cast( pptr() - pbase() ) ) ); setp( pbase(), epptr() ); } return 0; } }; /////////////////////////////////////////////////////////////////////////// FileStream::FileStream( std::string const& filename ) { m_ofs.open( filename.c_str() ); if( m_ofs.fail() ) { std::ostringstream oss; oss << "Unable to open file: '" << filename << "'"; throw std::domain_error( oss.str() ); } } std::ostream& FileStream::stream() const { return m_ofs; } struct OutputDebugWriter { void operator()( std::string const&str ) { writeToDebugConsole( str ); } }; DebugOutStream::DebugOutStream() : m_streamBuf( new StreamBufImpl() ), m_os( m_streamBuf.get() ) {} std::ostream& DebugOutStream::stream() const { return m_os; } // Store the streambuf from cout up-front because // cout may get redirected when running tests CoutStream::CoutStream() : m_os( Catch::cout().rdbuf() ) {} std::ostream& CoutStream::stream() const { return m_os; } #ifndef CATCH_CONFIG_NOSTDOUT // If you #define this you must implement these functions std::ostream& cout() { return std::cout; } std::ostream& cerr() { return std::cerr; } #endif } namespace Catch { class Context : public IMutableContext { Context() : m_config( CATCH_NULL ), m_runner( CATCH_NULL ), m_resultCapture( CATCH_NULL ) {} Context( Context const& ); void operator=( Context const& ); public: // IContext virtual IResultCapture* getResultCapture() { return m_resultCapture; } virtual IRunner* getRunner() { return m_runner; } virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) { return getGeneratorsForCurrentTest() .getGeneratorInfo( fileInfo, totalSize ) .getCurrentIndex(); } virtual bool advanceGeneratorsForCurrentTest() { IGeneratorsForTest* generators = findGeneratorsForCurrentTest(); return generators && generators->moveNext(); } virtual Ptr getConfig() const { return m_config; } public: // IMutableContext virtual void setResultCapture( IResultCapture* resultCapture ) { m_resultCapture = resultCapture; } virtual void setRunner( IRunner* runner ) { m_runner = runner; } virtual void setConfig( Ptr const& config ) { m_config = config; } friend IMutableContext& getCurrentMutableContext(); private: IGeneratorsForTest* findGeneratorsForCurrentTest() { std::string testName = getResultCapture()->getCurrentTestName(); std::map::const_iterator it = m_generatorsByTestName.find( testName ); return it != m_generatorsByTestName.end() ? it->second : CATCH_NULL; } IGeneratorsForTest& getGeneratorsForCurrentTest() { IGeneratorsForTest* generators = findGeneratorsForCurrentTest(); if( !generators ) { std::string testName = getResultCapture()->getCurrentTestName(); generators = createGeneratorsForTest(); m_generatorsByTestName.insert( std::make_pair( testName, generators ) ); } return *generators; } private: Ptr m_config; IRunner* m_runner; IResultCapture* m_resultCapture; std::map m_generatorsByTestName; }; namespace { Context* currentContext = CATCH_NULL; } IMutableContext& getCurrentMutableContext() { if( !currentContext ) currentContext = new Context(); return *currentContext; } IContext& getCurrentContext() { return getCurrentMutableContext(); } void cleanUpContext() { delete currentContext; currentContext = CATCH_NULL; } } // #included from: catch_console_colour_impl.hpp #define TWOBLUECUBES_CATCH_CONSOLE_COLOUR_IMPL_HPP_INCLUDED namespace Catch { namespace { struct IColourImpl { virtual ~IColourImpl() {} virtual void use( Colour::Code _colourCode ) = 0; }; struct NoColourImpl : IColourImpl { void use( Colour::Code ) {} static IColourImpl* instance() { static NoColourImpl s_instance; return &s_instance; } }; } // anon namespace } // namespace Catch #if !defined( CATCH_CONFIG_COLOUR_NONE ) && !defined( CATCH_CONFIG_COLOUR_WINDOWS ) && !defined( CATCH_CONFIG_COLOUR_ANSI ) # ifdef CATCH_PLATFORM_WINDOWS # define CATCH_CONFIG_COLOUR_WINDOWS # else # define CATCH_CONFIG_COLOUR_ANSI # endif #endif #if defined ( CATCH_CONFIG_COLOUR_WINDOWS ) ///////////////////////////////////////// #ifndef NOMINMAX #define NOMINMAX #endif #ifdef __AFXDLL #include #else #include #endif namespace Catch { namespace { class Win32ColourImpl : public IColourImpl { public: Win32ColourImpl() : stdoutHandle( GetStdHandle(STD_OUTPUT_HANDLE) ) { CONSOLE_SCREEN_BUFFER_INFO csbiInfo; GetConsoleScreenBufferInfo( stdoutHandle, &csbiInfo ); originalForegroundAttributes = csbiInfo.wAttributes & ~( BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_BLUE | BACKGROUND_INTENSITY ); originalBackgroundAttributes = csbiInfo.wAttributes & ~( FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_INTENSITY ); } virtual void use( Colour::Code _colourCode ) { switch( _colourCode ) { case Colour::None: return setTextAttribute( originalForegroundAttributes ); case Colour::White: return setTextAttribute( FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE ); case Colour::Red: return setTextAttribute( FOREGROUND_RED ); case Colour::Green: return setTextAttribute( FOREGROUND_GREEN ); case Colour::Blue: return setTextAttribute( FOREGROUND_BLUE ); case Colour::Cyan: return setTextAttribute( FOREGROUND_BLUE | FOREGROUND_GREEN ); case Colour::Yellow: return setTextAttribute( FOREGROUND_RED | FOREGROUND_GREEN ); case Colour::Grey: return setTextAttribute( 0 ); case Colour::LightGrey: return setTextAttribute( FOREGROUND_INTENSITY ); case Colour::BrightRed: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_RED ); case Colour::BrightGreen: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN ); case Colour::BrightWhite: return setTextAttribute( FOREGROUND_INTENSITY | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_BLUE ); case Colour::Bright: throw std::logic_error( "not a colour" ); } } private: void setTextAttribute( WORD _textAttribute ) { SetConsoleTextAttribute( stdoutHandle, _textAttribute | originalBackgroundAttributes ); } HANDLE stdoutHandle; WORD originalForegroundAttributes; WORD originalBackgroundAttributes; }; IColourImpl* platformColourInstance() { static Win32ColourImpl s_instance; Ptr config = getCurrentContext().getConfig(); UseColour::YesOrNo colourMode = config ? config->useColour() : UseColour::Auto; if( colourMode == UseColour::Auto ) colourMode = !isDebuggerActive() ? UseColour::Yes : UseColour::No; return colourMode == UseColour::Yes ? &s_instance : NoColourImpl::instance(); } } // end anon namespace } // end namespace Catch #elif defined( CATCH_CONFIG_COLOUR_ANSI ) ////////////////////////////////////// #include namespace Catch { namespace { // use POSIX/ ANSI console terminal codes // Thanks to Adam Strzelecki for original contribution // (http://github.com/nanoant) // https://github.com/philsquared/Catch/pull/131 class PosixColourImpl : public IColourImpl { public: virtual void use( Colour::Code _colourCode ) { switch( _colourCode ) { case Colour::None: case Colour::White: return setColour( "[0m" ); case Colour::Red: return setColour( "[0;31m" ); case Colour::Green: return setColour( "[0;32m" ); case Colour::Blue: return setColour( "[0;34m" ); case Colour::Cyan: return setColour( "[0;36m" ); case Colour::Yellow: return setColour( "[0;33m" ); case Colour::Grey: return setColour( "[1;30m" ); case Colour::LightGrey: return setColour( "[0;37m" ); case Colour::BrightRed: return setColour( "[1;31m" ); case Colour::BrightGreen: return setColour( "[1;32m" ); case Colour::BrightWhite: return setColour( "[1;37m" ); case Colour::Bright: throw std::logic_error( "not a colour" ); } } static IColourImpl* instance() { static PosixColourImpl s_instance; return &s_instance; } private: void setColour( const char* _escapeCode ) { Catch::cout() << '\033' << _escapeCode; } }; IColourImpl* platformColourInstance() { Ptr config = getCurrentContext().getConfig(); UseColour::YesOrNo colourMode = config ? config->useColour() : UseColour::Auto; if( colourMode == UseColour::Auto ) colourMode = (!isDebuggerActive() && isatty(STDOUT_FILENO) ) ? UseColour::Yes : UseColour::No; return colourMode == UseColour::Yes ? PosixColourImpl::instance() : NoColourImpl::instance(); } } // end anon namespace } // end namespace Catch #else // not Windows or ANSI /////////////////////////////////////////////// namespace Catch { static IColourImpl* platformColourInstance() { return NoColourImpl::instance(); } } // end namespace Catch #endif // Windows/ ANSI/ None namespace Catch { Colour::Colour( Code _colourCode ) : m_moved( false ) { use( _colourCode ); } Colour::Colour( Colour const& _other ) : m_moved( false ) { const_cast( _other ).m_moved = true; } Colour::~Colour(){ if( !m_moved ) use( None ); } void Colour::use( Code _colourCode ) { static IColourImpl* impl = platformColourInstance(); impl->use( _colourCode ); } } // end namespace Catch // #included from: catch_generators_impl.hpp #define TWOBLUECUBES_CATCH_GENERATORS_IMPL_HPP_INCLUDED #include #include #include namespace Catch { struct GeneratorInfo : IGeneratorInfo { GeneratorInfo( std::size_t size ) : m_size( size ), m_currentIndex( 0 ) {} bool moveNext() { if( ++m_currentIndex == m_size ) { m_currentIndex = 0; return false; } return true; } std::size_t getCurrentIndex() const { return m_currentIndex; } std::size_t m_size; std::size_t m_currentIndex; }; /////////////////////////////////////////////////////////////////////////// class GeneratorsForTest : public IGeneratorsForTest { public: ~GeneratorsForTest() { deleteAll( m_generatorsInOrder ); } IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) { std::map::const_iterator it = m_generatorsByName.find( fileInfo ); if( it == m_generatorsByName.end() ) { IGeneratorInfo* info = new GeneratorInfo( size ); m_generatorsByName.insert( std::make_pair( fileInfo, info ) ); m_generatorsInOrder.push_back( info ); return *info; } return *it->second; } bool moveNext() { std::vector::const_iterator it = m_generatorsInOrder.begin(); std::vector::const_iterator itEnd = m_generatorsInOrder.end(); for(; it != itEnd; ++it ) { if( (*it)->moveNext() ) return true; } return false; } private: std::map m_generatorsByName; std::vector m_generatorsInOrder; }; IGeneratorsForTest* createGeneratorsForTest() { return new GeneratorsForTest(); } } // end namespace Catch // #included from: catch_assertionresult.hpp #define TWOBLUECUBES_CATCH_ASSERTIONRESULT_HPP_INCLUDED namespace Catch { AssertionInfo::AssertionInfo( std::string const& _macroName, SourceLineInfo const& _lineInfo, std::string const& _capturedExpression, ResultDisposition::Flags _resultDisposition ) : macroName( _macroName ), lineInfo( _lineInfo ), capturedExpression( _capturedExpression ), resultDisposition( _resultDisposition ) {} AssertionResult::AssertionResult() {} AssertionResult::AssertionResult( AssertionInfo const& info, AssertionResultData const& data ) : m_info( info ), m_resultData( data ) {} AssertionResult::~AssertionResult() {} // Result was a success bool AssertionResult::succeeded() const { return Catch::isOk( m_resultData.resultType ); } // Result was a success, or failure is suppressed bool AssertionResult::isOk() const { return Catch::isOk( m_resultData.resultType ) || shouldSuppressFailure( m_info.resultDisposition ); } ResultWas::OfType AssertionResult::getResultType() const { return m_resultData.resultType; } bool AssertionResult::hasExpression() const { return !m_info.capturedExpression.empty(); } bool AssertionResult::hasMessage() const { return !m_resultData.message.empty(); } std::string AssertionResult::getExpression() const { if( isFalseTest( m_info.resultDisposition ) ) return "!" + m_info.capturedExpression; else return m_info.capturedExpression; } std::string AssertionResult::getExpressionInMacro() const { if( m_info.macroName.empty() ) return m_info.capturedExpression; else return m_info.macroName + "( " + m_info.capturedExpression + " )"; } bool AssertionResult::hasExpandedExpression() const { return hasExpression() && getExpandedExpression() != getExpression(); } std::string AssertionResult::getExpandedExpression() const { return m_resultData.reconstructedExpression; } std::string AssertionResult::getMessage() const { return m_resultData.message; } SourceLineInfo AssertionResult::getSourceInfo() const { return m_info.lineInfo; } std::string AssertionResult::getTestMacroName() const { return m_info.macroName; } } // end namespace Catch // #included from: catch_test_case_info.hpp #define TWOBLUECUBES_CATCH_TEST_CASE_INFO_HPP_INCLUDED namespace Catch { inline TestCaseInfo::SpecialProperties parseSpecialTag( std::string const& tag ) { if( startsWith( tag, "." ) || tag == "hide" || tag == "!hide" ) return TestCaseInfo::IsHidden; else if( tag == "!throws" ) return TestCaseInfo::Throws; else if( tag == "!shouldfail" ) return TestCaseInfo::ShouldFail; else if( tag == "!mayfail" ) return TestCaseInfo::MayFail; else return TestCaseInfo::None; } inline bool isReservedTag( std::string const& tag ) { return parseSpecialTag( tag ) == TestCaseInfo::None && tag.size() > 0 && !isalnum( tag[0] ); } inline void enforceNotReservedTag( std::string const& tag, SourceLineInfo const& _lineInfo ) { if( isReservedTag( tag ) ) { { Colour colourGuard( Colour::Red ); Catch::cerr() << "Tag name [" << tag << "] not allowed.\n" << "Tag names starting with non alpha-numeric characters are reserved\n"; } { Colour colourGuard( Colour::FileName ); Catch::cerr() << _lineInfo << std::endl; } exit(1); } } TestCase makeTestCase( ITestCase* _testCase, std::string const& _className, std::string const& _name, std::string const& _descOrTags, SourceLineInfo const& _lineInfo ) { bool isHidden( startsWith( _name, "./" ) ); // Legacy support // Parse out tags std::set tags; std::string desc, tag; bool inTag = false; for( std::size_t i = 0; i < _descOrTags.size(); ++i ) { char c = _descOrTags[i]; if( !inTag ) { if( c == '[' ) inTag = true; else desc += c; } else { if( c == ']' ) { TestCaseInfo::SpecialProperties prop = parseSpecialTag( tag ); if( prop == TestCaseInfo::IsHidden ) isHidden = true; else if( prop == TestCaseInfo::None ) enforceNotReservedTag( tag, _lineInfo ); tags.insert( tag ); tag.clear(); inTag = false; } else tag += c; } } if( isHidden ) { tags.insert( "hide" ); tags.insert( "." ); } TestCaseInfo info( _name, _className, desc, tags, _lineInfo ); return TestCase( _testCase, info ); } void setTags( TestCaseInfo& testCaseInfo, std::set const& tags ) { testCaseInfo.tags = tags; testCaseInfo.lcaseTags.clear(); std::ostringstream oss; for( std::set::const_iterator it = tags.begin(), itEnd = tags.end(); it != itEnd; ++it ) { oss << "[" << *it << "]"; std::string lcaseTag = toLower( *it ); testCaseInfo.properties = static_cast( testCaseInfo.properties | parseSpecialTag( lcaseTag ) ); testCaseInfo.lcaseTags.insert( lcaseTag ); } testCaseInfo.tagsAsString = oss.str(); } TestCaseInfo::TestCaseInfo( std::string const& _name, std::string const& _className, std::string const& _description, std::set const& _tags, SourceLineInfo const& _lineInfo ) : name( _name ), className( _className ), description( _description ), lineInfo( _lineInfo ), properties( None ) { setTags( *this, _tags ); } TestCaseInfo::TestCaseInfo( TestCaseInfo const& other ) : name( other.name ), className( other.className ), description( other.description ), tags( other.tags ), lcaseTags( other.lcaseTags ), tagsAsString( other.tagsAsString ), lineInfo( other.lineInfo ), properties( other.properties ) {} bool TestCaseInfo::isHidden() const { return ( properties & IsHidden ) != 0; } bool TestCaseInfo::throws() const { return ( properties & Throws ) != 0; } bool TestCaseInfo::okToFail() const { return ( properties & (ShouldFail | MayFail ) ) != 0; } bool TestCaseInfo::expectedToFail() const { return ( properties & (ShouldFail ) ) != 0; } TestCase::TestCase( ITestCase* testCase, TestCaseInfo const& info ) : TestCaseInfo( info ), test( testCase ) {} TestCase::TestCase( TestCase const& other ) : TestCaseInfo( other ), test( other.test ) {} TestCase TestCase::withName( std::string const& _newName ) const { TestCase other( *this ); other.name = _newName; return other; } void TestCase::swap( TestCase& other ) { test.swap( other.test ); name.swap( other.name ); className.swap( other.className ); description.swap( other.description ); tags.swap( other.tags ); lcaseTags.swap( other.lcaseTags ); tagsAsString.swap( other.tagsAsString ); std::swap( TestCaseInfo::properties, static_cast( other ).properties ); std::swap( lineInfo, other.lineInfo ); } void TestCase::invoke() const { test->invoke(); } bool TestCase::operator == ( TestCase const& other ) const { return test.get() == other.test.get() && name == other.name && className == other.className; } bool TestCase::operator < ( TestCase const& other ) const { return name < other.name; } TestCase& TestCase::operator = ( TestCase const& other ) { TestCase temp( other ); swap( temp ); return *this; } TestCaseInfo const& TestCase::getTestCaseInfo() const { return *this; } } // end namespace Catch // #included from: catch_version.hpp #define TWOBLUECUBES_CATCH_VERSION_HPP_INCLUDED namespace Catch { Version::Version ( unsigned int _majorVersion, unsigned int _minorVersion, unsigned int _patchNumber, std::string const& _branchName, unsigned int _buildNumber ) : majorVersion( _majorVersion ), minorVersion( _minorVersion ), patchNumber( _patchNumber ), branchName( _branchName ), buildNumber( _buildNumber ) {} std::ostream& operator << ( std::ostream& os, Version const& version ) { os << version.majorVersion << "." << version.minorVersion << "." << version.patchNumber; if( !version.branchName.empty() ) { os << "-" << version.branchName << "." << version.buildNumber; } return os; } Version libraryVersion( 1, 6, 0, "", 0 ); } // #included from: catch_message.hpp #define TWOBLUECUBES_CATCH_MESSAGE_HPP_INCLUDED namespace Catch { MessageInfo::MessageInfo( std::string const& _macroName, SourceLineInfo const& _lineInfo, ResultWas::OfType _type ) : macroName( _macroName ), lineInfo( _lineInfo ), type( _type ), sequence( ++globalCount ) {} // This may need protecting if threading support is added unsigned int MessageInfo::globalCount = 0; //////////////////////////////////////////////////////////////////////////// ScopedMessage::ScopedMessage( MessageBuilder const& builder ) : m_info( builder.m_info ) { m_info.message = builder.m_stream.str(); getResultCapture().pushScopedMessage( m_info ); } ScopedMessage::ScopedMessage( ScopedMessage const& other ) : m_info( other.m_info ) {} ScopedMessage::~ScopedMessage() { getResultCapture().popScopedMessage( m_info ); } } // end namespace Catch // #included from: catch_legacy_reporter_adapter.hpp #define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_HPP_INCLUDED // #included from: catch_legacy_reporter_adapter.h #define TWOBLUECUBES_CATCH_LEGACY_REPORTER_ADAPTER_H_INCLUDED namespace Catch { // Deprecated struct IReporter : IShared { virtual ~IReporter(); virtual bool shouldRedirectStdout() const = 0; virtual void StartTesting() = 0; virtual void EndTesting( Totals const& totals ) = 0; virtual void StartGroup( std::string const& groupName ) = 0; virtual void EndGroup( std::string const& groupName, Totals const& totals ) = 0; virtual void StartTestCase( TestCaseInfo const& testInfo ) = 0; virtual void EndTestCase( TestCaseInfo const& testInfo, Totals const& totals, std::string const& stdOut, std::string const& stdErr ) = 0; virtual void StartSection( std::string const& sectionName, std::string const& description ) = 0; virtual void EndSection( std::string const& sectionName, Counts const& assertions ) = 0; virtual void NoAssertionsInSection( std::string const& sectionName ) = 0; virtual void NoAssertionsInTestCase( std::string const& testName ) = 0; virtual void Aborted() = 0; virtual void Result( AssertionResult const& result ) = 0; }; class LegacyReporterAdapter : public SharedImpl { public: LegacyReporterAdapter( Ptr const& legacyReporter ); virtual ~LegacyReporterAdapter(); virtual ReporterPreferences getPreferences() const; virtual void noMatchingTestCases( std::string const& ); virtual void testRunStarting( TestRunInfo const& ); virtual void testGroupStarting( GroupInfo const& groupInfo ); virtual void testCaseStarting( TestCaseInfo const& testInfo ); virtual void sectionStarting( SectionInfo const& sectionInfo ); virtual void assertionStarting( AssertionInfo const& ); virtual bool assertionEnded( AssertionStats const& assertionStats ); virtual void sectionEnded( SectionStats const& sectionStats ); virtual void testCaseEnded( TestCaseStats const& testCaseStats ); virtual void testGroupEnded( TestGroupStats const& testGroupStats ); virtual void testRunEnded( TestRunStats const& testRunStats ); virtual void skipTest( TestCaseInfo const& ); private: Ptr m_legacyReporter; }; } namespace Catch { LegacyReporterAdapter::LegacyReporterAdapter( Ptr const& legacyReporter ) : m_legacyReporter( legacyReporter ) {} LegacyReporterAdapter::~LegacyReporterAdapter() {} ReporterPreferences LegacyReporterAdapter::getPreferences() const { ReporterPreferences prefs; prefs.shouldRedirectStdOut = m_legacyReporter->shouldRedirectStdout(); return prefs; } void LegacyReporterAdapter::noMatchingTestCases( std::string const& ) {} void LegacyReporterAdapter::testRunStarting( TestRunInfo const& ) { m_legacyReporter->StartTesting(); } void LegacyReporterAdapter::testGroupStarting( GroupInfo const& groupInfo ) { m_legacyReporter->StartGroup( groupInfo.name ); } void LegacyReporterAdapter::testCaseStarting( TestCaseInfo const& testInfo ) { m_legacyReporter->StartTestCase( testInfo ); } void LegacyReporterAdapter::sectionStarting( SectionInfo const& sectionInfo ) { m_legacyReporter->StartSection( sectionInfo.name, sectionInfo.description ); } void LegacyReporterAdapter::assertionStarting( AssertionInfo const& ) { // Not on legacy interface } bool LegacyReporterAdapter::assertionEnded( AssertionStats const& assertionStats ) { if( assertionStats.assertionResult.getResultType() != ResultWas::Ok ) { for( std::vector::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end(); it != itEnd; ++it ) { if( it->type == ResultWas::Info ) { ResultBuilder rb( it->macroName.c_str(), it->lineInfo, "", ResultDisposition::Normal ); rb << it->message; rb.setResultType( ResultWas::Info ); AssertionResult result = rb.build(); m_legacyReporter->Result( result ); } } } m_legacyReporter->Result( assertionStats.assertionResult ); return true; } void LegacyReporterAdapter::sectionEnded( SectionStats const& sectionStats ) { if( sectionStats.missingAssertions ) m_legacyReporter->NoAssertionsInSection( sectionStats.sectionInfo.name ); m_legacyReporter->EndSection( sectionStats.sectionInfo.name, sectionStats.assertions ); } void LegacyReporterAdapter::testCaseEnded( TestCaseStats const& testCaseStats ) { m_legacyReporter->EndTestCase ( testCaseStats.testInfo, testCaseStats.totals, testCaseStats.stdOut, testCaseStats.stdErr ); } void LegacyReporterAdapter::testGroupEnded( TestGroupStats const& testGroupStats ) { if( testGroupStats.aborting ) m_legacyReporter->Aborted(); m_legacyReporter->EndGroup( testGroupStats.groupInfo.name, testGroupStats.totals ); } void LegacyReporterAdapter::testRunEnded( TestRunStats const& testRunStats ) { m_legacyReporter->EndTesting( testRunStats.totals ); } void LegacyReporterAdapter::skipTest( TestCaseInfo const& ) { } } // #included from: catch_timer.hpp #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wc++11-long-long" #endif #ifdef CATCH_PLATFORM_WINDOWS #include #else #include #endif namespace Catch { namespace { #ifdef CATCH_PLATFORM_WINDOWS uint64_t getCurrentTicks() { static uint64_t hz=0, hzo=0; if (!hz) { QueryPerformanceFrequency( reinterpret_cast( &hz ) ); QueryPerformanceCounter( reinterpret_cast( &hzo ) ); } uint64_t t; QueryPerformanceCounter( reinterpret_cast( &t ) ); return ((t-hzo)*1000000)/hz; } #else uint64_t getCurrentTicks() { timeval t; gettimeofday(&t,CATCH_NULL); return static_cast( t.tv_sec ) * 1000000ull + static_cast( t.tv_usec ); } #endif } void Timer::start() { m_ticks = getCurrentTicks(); } unsigned int Timer::getElapsedMicroseconds() const { return static_cast(getCurrentTicks() - m_ticks); } unsigned int Timer::getElapsedMilliseconds() const { return static_cast(getElapsedMicroseconds()/1000); } double Timer::getElapsedSeconds() const { return getElapsedMicroseconds()/1000000.0; } } // namespace Catch #ifdef __clang__ #pragma clang diagnostic pop #endif // #included from: catch_common.hpp #define TWOBLUECUBES_CATCH_COMMON_HPP_INCLUDED namespace Catch { bool startsWith( std::string const& s, std::string const& prefix ) { return s.size() >= prefix.size() && s.substr( 0, prefix.size() ) == prefix; } bool endsWith( std::string const& s, std::string const& suffix ) { return s.size() >= suffix.size() && s.substr( s.size()-suffix.size(), suffix.size() ) == suffix; } bool contains( std::string const& s, std::string const& infix ) { return s.find( infix ) != std::string::npos; } char toLowerCh(char c) { return static_cast( ::tolower( c ) ); } void toLowerInPlace( std::string& s ) { std::transform( s.begin(), s.end(), s.begin(), toLowerCh ); } std::string toLower( std::string const& s ) { std::string lc = s; toLowerInPlace( lc ); return lc; } std::string trim( std::string const& str ) { static char const* whitespaceChars = "\n\r\t "; std::string::size_type start = str.find_first_not_of( whitespaceChars ); std::string::size_type end = str.find_last_not_of( whitespaceChars ); return start != std::string::npos ? str.substr( start, 1+end-start ) : ""; } bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ) { bool replaced = false; std::size_t i = str.find( replaceThis ); while( i != std::string::npos ) { replaced = true; str = str.substr( 0, i ) + withThis + str.substr( i+replaceThis.size() ); if( i < str.size()-withThis.size() ) i = str.find( replaceThis, i+withThis.size() ); else i = std::string::npos; } return replaced; } pluralise::pluralise( std::size_t count, std::string const& label ) : m_count( count ), m_label( label ) {} std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ) { os << pluraliser.m_count << " " << pluraliser.m_label; if( pluraliser.m_count != 1 ) os << "s"; return os; } SourceLineInfo::SourceLineInfo() : line( 0 ){} SourceLineInfo::SourceLineInfo( char const* _file, std::size_t _line ) : file( _file ), line( _line ) {} SourceLineInfo::SourceLineInfo( SourceLineInfo const& other ) : file( other.file ), line( other.line ) {} bool SourceLineInfo::empty() const { return file.empty(); } bool SourceLineInfo::operator == ( SourceLineInfo const& other ) const { return line == other.line && file == other.file; } bool SourceLineInfo::operator < ( SourceLineInfo const& other ) const { return line < other.line || ( line == other.line && file < other.file ); } void seedRng( IConfig const& config ) { if( config.rngSeed() != 0 ) std::srand( config.rngSeed() ); } unsigned int rngSeed() { return getCurrentContext().getConfig()->rngSeed(); } std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ) { #ifndef __GNUG__ os << info.file << "(" << info.line << ")"; #else os << info.file << ":" << info.line; #endif return os; } void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ) { std::ostringstream oss; oss << locationInfo << ": Internal Catch error: '" << message << "'"; if( alwaysTrue() ) throw std::logic_error( oss.str() ); } } // #included from: catch_section.hpp #define TWOBLUECUBES_CATCH_SECTION_HPP_INCLUDED namespace Catch { SectionInfo::SectionInfo ( SourceLineInfo const& _lineInfo, std::string const& _name, std::string const& _description ) : name( _name ), description( _description ), lineInfo( _lineInfo ) {} Section::Section( SectionInfo const& info ) : m_info( info ), m_sectionIncluded( getResultCapture().sectionStarted( m_info, m_assertions ) ) { m_timer.start(); } Section::~Section() { if( m_sectionIncluded ) { SectionEndInfo endInfo( m_info, m_assertions, m_timer.getElapsedSeconds() ); if( std::uncaught_exception() ) getResultCapture().sectionEndedEarly( endInfo ); else getResultCapture().sectionEnded( endInfo ); } } // This indicates whether the section should be executed or not Section::operator bool() const { return m_sectionIncluded; } } // end namespace Catch // #included from: catch_debugger.hpp #define TWOBLUECUBES_CATCH_DEBUGGER_HPP_INCLUDED #include #ifdef CATCH_PLATFORM_MAC #include #include #include #include #include namespace Catch{ // The following function is taken directly from the following technical note: // http://developer.apple.com/library/mac/#qa/qa2004/qa1361.html // Returns true if the current process is being debugged (either // running under the debugger or has a debugger attached post facto). bool isDebuggerActive(){ int mib[4]; struct kinfo_proc info; size_t size; // Initialize the flags so that, if sysctl fails for some bizarre // reason, we get a predictable result. info.kp_proc.p_flag = 0; // Initialize mib, which tells sysctl the info we want, in this case // we're looking for information about a specific process ID. mib[0] = CTL_KERN; mib[1] = KERN_PROC; mib[2] = KERN_PROC_PID; mib[3] = getpid(); // Call sysctl. size = sizeof(info); if( sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, CATCH_NULL, 0) != 0 ) { Catch::cerr() << "\n** Call to sysctl failed - unable to determine if debugger is active **\n" << std::endl; return false; } // We're being debugged if the P_TRACED flag is set. return ( (info.kp_proc.p_flag & P_TRACED) != 0 ); } } // namespace Catch #elif defined(_MSC_VER) extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent(); namespace Catch { bool isDebuggerActive() { return IsDebuggerPresent() != 0; } } #elif defined(__MINGW32__) extern "C" __declspec(dllimport) int __stdcall IsDebuggerPresent(); namespace Catch { bool isDebuggerActive() { return IsDebuggerPresent() != 0; } } #else namespace Catch { inline bool isDebuggerActive() { return false; } } #endif // Platform #ifdef CATCH_PLATFORM_WINDOWS extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA( const char* ); namespace Catch { void writeToDebugConsole( std::string const& text ) { ::OutputDebugStringA( text.c_str() ); } } #else namespace Catch { void writeToDebugConsole( std::string const& text ) { // !TBD: Need a version for Mac/ XCode and other IDEs Catch::cout() << text; } } #endif // Platform // #included from: catch_tostring.hpp #define TWOBLUECUBES_CATCH_TOSTRING_HPP_INCLUDED namespace Catch { namespace Detail { const std::string unprintableString = "{?}"; namespace { const int hexThreshold = 255; struct Endianness { enum Arch { Big, Little }; static Arch which() { union _{ int asInt; char asChar[sizeof (int)]; } u; u.asInt = 1; return ( u.asChar[sizeof(int)-1] == 1 ) ? Big : Little; } }; } std::string rawMemoryToString( const void *object, std::size_t size ) { // Reverse order for little endian architectures int i = 0, end = static_cast( size ), inc = 1; if( Endianness::which() == Endianness::Little ) { i = end-1; end = inc = -1; } unsigned char const *bytes = static_cast(object); std::ostringstream os; os << "0x" << std::setfill('0') << std::hex; for( ; i != end; i += inc ) os << std::setw(2) << static_cast(bytes[i]); return os.str(); } } std::string toString( std::string const& value ) { std::string s = value; if( getCurrentContext().getConfig()->showInvisibles() ) { for(size_t i = 0; i < s.size(); ++i ) { std::string subs; switch( s[i] ) { case '\n': subs = "\\n"; break; case '\t': subs = "\\t"; break; default: break; } if( !subs.empty() ) { s = s.substr( 0, i ) + subs + s.substr( i+1 ); ++i; } } } return "\"" + s + "\""; } std::string toString( std::wstring const& value ) { std::string s; s.reserve( value.size() ); for(size_t i = 0; i < value.size(); ++i ) s += value[i] <= 0xff ? static_cast( value[i] ) : '?'; return Catch::toString( s ); } std::string toString( const char* const value ) { return value ? Catch::toString( std::string( value ) ) : std::string( "{null string}" ); } std::string toString( char* const value ) { return Catch::toString( static_cast( value ) ); } std::string toString( const wchar_t* const value ) { return value ? Catch::toString( std::wstring(value) ) : std::string( "{null string}" ); } std::string toString( wchar_t* const value ) { return Catch::toString( static_cast( value ) ); } std::string toString( int value ) { std::ostringstream oss; oss << value; if( value > Detail::hexThreshold ) oss << " (0x" << std::hex << value << ")"; return oss.str(); } std::string toString( unsigned long value ) { std::ostringstream oss; oss << value; if( value > Detail::hexThreshold ) oss << " (0x" << std::hex << value << ")"; return oss.str(); } std::string toString( unsigned int value ) { return Catch::toString( static_cast( value ) ); } template std::string fpToString( T value, int precision ) { std::ostringstream oss; oss << std::setprecision( precision ) << std::fixed << value; std::string d = oss.str(); std::size_t i = d.find_last_not_of( '0' ); if( i != std::string::npos && i != d.size()-1 ) { if( d[i] == '.' ) i++; d = d.substr( 0, i+1 ); } return d; } std::string toString( const double value ) { return fpToString( value, 10 ); } std::string toString( const float value ) { return fpToString( value, 5 ) + "f"; } std::string toString( bool value ) { return value ? "true" : "false"; } std::string toString( char value ) { return value < ' ' ? toString( static_cast( value ) ) : Detail::makeString( value ); } std::string toString( signed char value ) { return toString( static_cast( value ) ); } std::string toString( unsigned char value ) { return toString( static_cast( value ) ); } #ifdef CATCH_CONFIG_CPP11_LONG_LONG std::string toString( long long value ) { std::ostringstream oss; oss << value; if( value > Detail::hexThreshold ) oss << " (0x" << std::hex << value << ")"; return oss.str(); } std::string toString( unsigned long long value ) { std::ostringstream oss; oss << value; if( value > Detail::hexThreshold ) oss << " (0x" << std::hex << value << ")"; return oss.str(); } #endif #ifdef CATCH_CONFIG_CPP11_NULLPTR std::string toString( std::nullptr_t ) { return "nullptr"; } #endif #ifdef __OBJC__ std::string toString( NSString const * const& nsstring ) { if( !nsstring ) return "nil"; return "@" + toString([nsstring UTF8String]); } std::string toString( NSString * CATCH_ARC_STRONG const& nsstring ) { if( !nsstring ) return "nil"; return "@" + toString([nsstring UTF8String]); } std::string toString( NSObject* const& nsObject ) { return toString( [nsObject description] ); } #endif } // end namespace Catch // #included from: catch_result_builder.hpp #define TWOBLUECUBES_CATCH_RESULT_BUILDER_HPP_INCLUDED namespace Catch { std::string capturedExpressionWithSecondArgument( std::string const& capturedExpression, std::string const& secondArg ) { return secondArg.empty() || secondArg == "\"\"" ? capturedExpression : capturedExpression + ", " + secondArg; } ResultBuilder::ResultBuilder( char const* macroName, SourceLineInfo const& lineInfo, char const* capturedExpression, ResultDisposition::Flags resultDisposition, char const* secondArg ) : m_assertionInfo( macroName, lineInfo, capturedExpressionWithSecondArgument( capturedExpression, secondArg ), resultDisposition ), m_shouldDebugBreak( false ), m_shouldThrow( false ) {} ResultBuilder& ResultBuilder::setResultType( ResultWas::OfType result ) { m_data.resultType = result; return *this; } ResultBuilder& ResultBuilder::setResultType( bool result ) { m_data.resultType = result ? ResultWas::Ok : ResultWas::ExpressionFailed; return *this; } ResultBuilder& ResultBuilder::setLhs( std::string const& lhs ) { m_exprComponents.lhs = lhs; return *this; } ResultBuilder& ResultBuilder::setRhs( std::string const& rhs ) { m_exprComponents.rhs = rhs; return *this; } ResultBuilder& ResultBuilder::setOp( std::string const& op ) { m_exprComponents.op = op; return *this; } void ResultBuilder::endExpression() { m_exprComponents.testFalse = isFalseTest( m_assertionInfo.resultDisposition ); captureExpression(); } void ResultBuilder::useActiveException( ResultDisposition::Flags resultDisposition ) { m_assertionInfo.resultDisposition = resultDisposition; m_stream.oss << Catch::translateActiveException(); captureResult( ResultWas::ThrewException ); } void ResultBuilder::captureResult( ResultWas::OfType resultType ) { setResultType( resultType ); captureExpression(); } void ResultBuilder::captureExpectedException( std::string const& expectedMessage ) { if( expectedMessage.empty() ) captureExpectedException( Matchers::Impl::Generic::AllOf() ); else captureExpectedException( Matchers::Equals( expectedMessage ) ); } void ResultBuilder::captureExpectedException( Matchers::Impl::Matcher const& matcher ) { assert( m_exprComponents.testFalse == false ); AssertionResultData data = m_data; data.resultType = ResultWas::Ok; data.reconstructedExpression = m_assertionInfo.capturedExpression; std::string actualMessage = Catch::translateActiveException(); if( !matcher.match( actualMessage ) ) { data.resultType = ResultWas::ExpressionFailed; data.reconstructedExpression = actualMessage; } AssertionResult result( m_assertionInfo, data ); handleResult( result ); } void ResultBuilder::captureExpression() { AssertionResult result = build(); handleResult( result ); } void ResultBuilder::handleResult( AssertionResult const& result ) { getResultCapture().assertionEnded( result ); if( !result.isOk() ) { if( getCurrentContext().getConfig()->shouldDebugBreak() ) m_shouldDebugBreak = true; if( getCurrentContext().getRunner()->aborting() || (m_assertionInfo.resultDisposition & ResultDisposition::Normal) ) m_shouldThrow = true; } } void ResultBuilder::react() { if( m_shouldThrow ) throw Catch::TestFailureException(); } bool ResultBuilder::shouldDebugBreak() const { return m_shouldDebugBreak; } bool ResultBuilder::allowThrows() const { return getCurrentContext().getConfig()->allowThrows(); } AssertionResult ResultBuilder::build() const { assert( m_data.resultType != ResultWas::Unknown ); AssertionResultData data = m_data; // Flip bool results if testFalse is set if( m_exprComponents.testFalse ) { if( data.resultType == ResultWas::Ok ) data.resultType = ResultWas::ExpressionFailed; else if( data.resultType == ResultWas::ExpressionFailed ) data.resultType = ResultWas::Ok; } data.message = m_stream.oss.str(); data.reconstructedExpression = reconstructExpression(); if( m_exprComponents.testFalse ) { if( m_exprComponents.op == "" ) data.reconstructedExpression = "!" + data.reconstructedExpression; else data.reconstructedExpression = "!(" + data.reconstructedExpression + ")"; } return AssertionResult( m_assertionInfo, data ); } std::string ResultBuilder::reconstructExpression() const { if( m_exprComponents.op == "" ) return m_exprComponents.lhs.empty() ? m_assertionInfo.capturedExpression : m_exprComponents.lhs; else if( m_exprComponents.op == "matches" ) return m_exprComponents.lhs + " " + m_exprComponents.rhs; else if( m_exprComponents.op != "!" ) { if( m_exprComponents.lhs.size() + m_exprComponents.rhs.size() < 40 && m_exprComponents.lhs.find("\n") == std::string::npos && m_exprComponents.rhs.find("\n") == std::string::npos ) return m_exprComponents.lhs + " " + m_exprComponents.op + " " + m_exprComponents.rhs; else return m_exprComponents.lhs + "\n" + m_exprComponents.op + "\n" + m_exprComponents.rhs; } else return "{can't expand - use " + m_assertionInfo.macroName + "_FALSE( " + m_assertionInfo.capturedExpression.substr(1) + " ) instead of " + m_assertionInfo.macroName + "( " + m_assertionInfo.capturedExpression + " ) for better diagnostics}"; } } // end namespace Catch // #included from: catch_tag_alias_registry.hpp #define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_HPP_INCLUDED // #included from: catch_tag_alias_registry.h #define TWOBLUECUBES_CATCH_TAG_ALIAS_REGISTRY_H_INCLUDED #include namespace Catch { class TagAliasRegistry : public ITagAliasRegistry { public: virtual ~TagAliasRegistry(); virtual Option find( std::string const& alias ) const; virtual std::string expandAliases( std::string const& unexpandedTestSpec ) const; void add( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); static TagAliasRegistry& get(); private: std::map m_registry; }; } // end namespace Catch #include #include namespace Catch { TagAliasRegistry::~TagAliasRegistry() {} Option TagAliasRegistry::find( std::string const& alias ) const { std::map::const_iterator it = m_registry.find( alias ); if( it != m_registry.end() ) return it->second; else return Option(); } std::string TagAliasRegistry::expandAliases( std::string const& unexpandedTestSpec ) const { std::string expandedTestSpec = unexpandedTestSpec; for( std::map::const_iterator it = m_registry.begin(), itEnd = m_registry.end(); it != itEnd; ++it ) { std::size_t pos = expandedTestSpec.find( it->first ); if( pos != std::string::npos ) { expandedTestSpec = expandedTestSpec.substr( 0, pos ) + it->second.tag + expandedTestSpec.substr( pos + it->first.size() ); } } return expandedTestSpec; } void TagAliasRegistry::add( char const* alias, char const* tag, SourceLineInfo const& lineInfo ) { if( !startsWith( alias, "[@" ) || !endsWith( alias, "]" ) ) { std::ostringstream oss; oss << "error: tag alias, \"" << alias << "\" is not of the form [@alias name].\n" << lineInfo; throw std::domain_error( oss.str().c_str() ); } if( !m_registry.insert( std::make_pair( alias, TagAlias( tag, lineInfo ) ) ).second ) { std::ostringstream oss; oss << "error: tag alias, \"" << alias << "\" already registered.\n" << "\tFirst seen at " << find(alias)->lineInfo << "\n" << "\tRedefined at " << lineInfo; throw std::domain_error( oss.str().c_str() ); } } TagAliasRegistry& TagAliasRegistry::get() { static TagAliasRegistry instance; return instance; } ITagAliasRegistry::~ITagAliasRegistry() {} ITagAliasRegistry const& ITagAliasRegistry::get() { return TagAliasRegistry::get(); } RegistrarForTagAliases::RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ) { try { TagAliasRegistry::get().add( alias, tag, lineInfo ); } catch( std::exception& ex ) { Colour colourGuard( Colour::Red ); Catch::cerr() << ex.what() << std::endl; exit(1); } } } // end namespace Catch // #included from: ../reporters/catch_reporter_multi.hpp #define TWOBLUECUBES_CATCH_REPORTER_MULTI_HPP_INCLUDED namespace Catch { class MultipleReporters : public SharedImpl { typedef std::vector > Reporters; Reporters m_reporters; public: void add( Ptr const& reporter ) { m_reporters.push_back( reporter ); } public: // IStreamingReporter virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { return m_reporters[0]->getPreferences(); } virtual void noMatchingTestCases( std::string const& spec ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->noMatchingTestCases( spec ); } virtual void testRunStarting( TestRunInfo const& testRunInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testRunStarting( testRunInfo ); } virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testGroupStarting( groupInfo ); } virtual void testCaseStarting( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testCaseStarting( testInfo ); } virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->sectionStarting( sectionInfo ); } virtual void assertionStarting( AssertionInfo const& assertionInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->assertionStarting( assertionInfo ); } // The return value indicates if the messages buffer should be cleared: virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { bool clearBuffer = false; for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) clearBuffer |= (*it)->assertionEnded( assertionStats ); return clearBuffer; } virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->sectionEnded( sectionStats ); } virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testCaseEnded( testCaseStats ); } virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testGroupEnded( testGroupStats ); } virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->testRunEnded( testRunStats ); } virtual void skipTest( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { for( Reporters::const_iterator it = m_reporters.begin(), itEnd = m_reporters.end(); it != itEnd; ++it ) (*it)->skipTest( testInfo ); } virtual MultipleReporters* tryAsMulti() CATCH_OVERRIDE { return this; } }; Ptr addReporter( Ptr const& existingReporter, Ptr const& additionalReporter ) { Ptr resultingReporter; if( existingReporter ) { MultipleReporters* multi = existingReporter->tryAsMulti(); if( !multi ) { multi = new MultipleReporters; resultingReporter = Ptr( multi ); if( existingReporter ) multi->add( existingReporter ); } else resultingReporter = existingReporter; multi->add( additionalReporter ); } else resultingReporter = additionalReporter; return resultingReporter; } } // end namespace Catch // #included from: ../reporters/catch_reporter_xml.hpp #define TWOBLUECUBES_CATCH_REPORTER_XML_HPP_INCLUDED // #included from: catch_reporter_bases.hpp #define TWOBLUECUBES_CATCH_REPORTER_BASES_HPP_INCLUDED #include namespace Catch { struct StreamingReporterBase : SharedImpl { StreamingReporterBase( ReporterConfig const& _config ) : m_config( _config.fullConfig() ), stream( _config.stream() ) { m_reporterPrefs.shouldRedirectStdOut = false; } virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { return m_reporterPrefs; } virtual ~StreamingReporterBase() CATCH_OVERRIDE; virtual void noMatchingTestCases( std::string const& ) CATCH_OVERRIDE {} virtual void testRunStarting( TestRunInfo const& _testRunInfo ) CATCH_OVERRIDE { currentTestRunInfo = _testRunInfo; } virtual void testGroupStarting( GroupInfo const& _groupInfo ) CATCH_OVERRIDE { currentGroupInfo = _groupInfo; } virtual void testCaseStarting( TestCaseInfo const& _testInfo ) CATCH_OVERRIDE { currentTestCaseInfo = _testInfo; } virtual void sectionStarting( SectionInfo const& _sectionInfo ) CATCH_OVERRIDE { m_sectionStack.push_back( _sectionInfo ); } virtual void sectionEnded( SectionStats const& /* _sectionStats */ ) CATCH_OVERRIDE { m_sectionStack.pop_back(); } virtual void testCaseEnded( TestCaseStats const& /* _testCaseStats */ ) CATCH_OVERRIDE { currentTestCaseInfo.reset(); } virtual void testGroupEnded( TestGroupStats const& /* _testGroupStats */ ) CATCH_OVERRIDE { currentGroupInfo.reset(); } virtual void testRunEnded( TestRunStats const& /* _testRunStats */ ) CATCH_OVERRIDE { currentTestCaseInfo.reset(); currentGroupInfo.reset(); currentTestRunInfo.reset(); } virtual void skipTest( TestCaseInfo const& ) CATCH_OVERRIDE { // Don't do anything with this by default. // It can optionally be overridden in the derived class. } Ptr m_config; std::ostream& stream; LazyStat currentTestRunInfo; LazyStat currentGroupInfo; LazyStat currentTestCaseInfo; std::vector m_sectionStack; ReporterPreferences m_reporterPrefs; }; struct CumulativeReporterBase : SharedImpl { template struct Node : SharedImpl<> { explicit Node( T const& _value ) : value( _value ) {} virtual ~Node() {} typedef std::vector > ChildNodes; T value; ChildNodes children; }; struct SectionNode : SharedImpl<> { explicit SectionNode( SectionStats const& _stats ) : stats( _stats ) {} virtual ~SectionNode(); bool operator == ( SectionNode const& other ) const { return stats.sectionInfo.lineInfo == other.stats.sectionInfo.lineInfo; } bool operator == ( Ptr const& other ) const { return operator==( *other ); } SectionStats stats; typedef std::vector > ChildSections; typedef std::vector Assertions; ChildSections childSections; Assertions assertions; std::string stdOut; std::string stdErr; }; struct BySectionInfo { BySectionInfo( SectionInfo const& other ) : m_other( other ) {} BySectionInfo( BySectionInfo const& other ) : m_other( other.m_other ) {} bool operator() ( Ptr const& node ) const { return node->stats.sectionInfo.lineInfo == m_other.lineInfo; } private: void operator=( BySectionInfo const& ); SectionInfo const& m_other; }; typedef Node TestCaseNode; typedef Node TestGroupNode; typedef Node TestRunNode; CumulativeReporterBase( ReporterConfig const& _config ) : m_config( _config.fullConfig() ), stream( _config.stream() ) { m_reporterPrefs.shouldRedirectStdOut = false; } ~CumulativeReporterBase(); virtual ReporterPreferences getPreferences() const CATCH_OVERRIDE { return m_reporterPrefs; } virtual void testRunStarting( TestRunInfo const& ) CATCH_OVERRIDE {} virtual void testGroupStarting( GroupInfo const& ) CATCH_OVERRIDE {} virtual void testCaseStarting( TestCaseInfo const& ) CATCH_OVERRIDE {} virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { SectionStats incompleteStats( sectionInfo, Counts(), 0, false ); Ptr node; if( m_sectionStack.empty() ) { if( !m_rootSection ) m_rootSection = new SectionNode( incompleteStats ); node = m_rootSection; } else { SectionNode& parentNode = *m_sectionStack.back(); SectionNode::ChildSections::const_iterator it = std::find_if( parentNode.childSections.begin(), parentNode.childSections.end(), BySectionInfo( sectionInfo ) ); if( it == parentNode.childSections.end() ) { node = new SectionNode( incompleteStats ); parentNode.childSections.push_back( node ); } else node = *it; } m_sectionStack.push_back( node ); m_deepestSection = node; } virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE {} virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { assert( !m_sectionStack.empty() ); SectionNode& sectionNode = *m_sectionStack.back(); sectionNode.assertions.push_back( assertionStats ); return true; } virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { assert( !m_sectionStack.empty() ); SectionNode& node = *m_sectionStack.back(); node.stats = sectionStats; m_sectionStack.pop_back(); } virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { Ptr node = new TestCaseNode( testCaseStats ); assert( m_sectionStack.size() == 0 ); node->children.push_back( m_rootSection ); m_testCases.push_back( node ); m_rootSection.reset(); assert( m_deepestSection ); m_deepestSection->stdOut = testCaseStats.stdOut; m_deepestSection->stdErr = testCaseStats.stdErr; } virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { Ptr node = new TestGroupNode( testGroupStats ); node->children.swap( m_testCases ); m_testGroups.push_back( node ); } virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { Ptr node = new TestRunNode( testRunStats ); node->children.swap( m_testGroups ); m_testRuns.push_back( node ); testRunEndedCumulative(); } virtual void testRunEndedCumulative() = 0; virtual void skipTest( TestCaseInfo const& ) CATCH_OVERRIDE {} Ptr m_config; std::ostream& stream; std::vector m_assertions; std::vector > > m_sections; std::vector > m_testCases; std::vector > m_testGroups; std::vector > m_testRuns; Ptr m_rootSection; Ptr m_deepestSection; std::vector > m_sectionStack; ReporterPreferences m_reporterPrefs; }; template char const* getLineOfChars() { static char line[CATCH_CONFIG_CONSOLE_WIDTH] = {0}; if( !*line ) { memset( line, C, CATCH_CONFIG_CONSOLE_WIDTH-1 ); line[CATCH_CONFIG_CONSOLE_WIDTH-1] = 0; } return line; } struct TestEventListenerBase : StreamingReporterBase { TestEventListenerBase( ReporterConfig const& _config ) : StreamingReporterBase( _config ) {} virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE {} virtual bool assertionEnded( AssertionStats const& ) CATCH_OVERRIDE { return false; } }; } // end namespace Catch // #included from: ../internal/catch_reporter_registrars.hpp #define TWOBLUECUBES_CATCH_REPORTER_REGISTRARS_HPP_INCLUDED namespace Catch { template class LegacyReporterRegistrar { class ReporterFactory : public IReporterFactory { virtual IStreamingReporter* create( ReporterConfig const& config ) const { return new LegacyReporterAdapter( new T( config ) ); } virtual std::string getDescription() const { return T::getDescription(); } }; public: LegacyReporterRegistrar( std::string const& name ) { getMutableRegistryHub().registerReporter( name, new ReporterFactory() ); } }; template class ReporterRegistrar { class ReporterFactory : public SharedImpl { // *** Please Note ***: // - If you end up here looking at a compiler error because it's trying to register // your custom reporter class be aware that the native reporter interface has changed // to IStreamingReporter. The "legacy" interface, IReporter, is still supported via // an adapter. Just use REGISTER_LEGACY_REPORTER to take advantage of the adapter. // However please consider updating to the new interface as the old one is now // deprecated and will probably be removed quite soon! // Please contact me via github if you have any questions at all about this. // In fact, ideally, please contact me anyway to let me know you've hit this - as I have // no idea who is actually using custom reporters at all (possibly no-one!). // The new interface is designed to minimise exposure to interface changes in the future. virtual IStreamingReporter* create( ReporterConfig const& config ) const { return new T( config ); } virtual std::string getDescription() const { return T::getDescription(); } }; public: ReporterRegistrar( std::string const& name ) { getMutableRegistryHub().registerReporter( name, new ReporterFactory() ); } }; template class ListenerRegistrar { class ListenerFactory : public SharedImpl { virtual IStreamingReporter* create( ReporterConfig const& config ) const { return new T( config ); } virtual std::string getDescription() const { return ""; } }; public: ListenerRegistrar() { getMutableRegistryHub().registerListener( new ListenerFactory() ); } }; } #define INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) \ namespace{ Catch::LegacyReporterRegistrar catch_internal_RegistrarFor##reporterType( name ); } #define INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) \ namespace{ Catch::ReporterRegistrar catch_internal_RegistrarFor##reporterType( name ); } #define INTERNAL_CATCH_REGISTER_LISTENER( listenerType ) \ namespace{ Catch::ListenerRegistrar catch_internal_RegistrarFor##listenerType; } // #included from: ../internal/catch_xmlwriter.hpp #define TWOBLUECUBES_CATCH_XMLWRITER_HPP_INCLUDED #include #include #include #include namespace Catch { class XmlEncode { public: enum ForWhat { ForTextNodes, ForAttributes }; XmlEncode( std::string const& str, ForWhat forWhat = ForTextNodes ) : m_str( str ), m_forWhat( forWhat ) {} void encodeTo( std::ostream& os ) const { // Apostrophe escaping not necessary if we always use " to write attributes // (see: http://www.w3.org/TR/xml/#syntax) for( std::size_t i = 0; i < m_str.size(); ++ i ) { char c = m_str[i]; switch( c ) { case '<': os << "<"; break; case '&': os << "&"; break; case '>': // See: http://www.w3.org/TR/xml/#syntax if( i > 2 && m_str[i-1] == ']' && m_str[i-2] == ']' ) os << ">"; else os << c; break; case '\"': if( m_forWhat == ForAttributes ) os << """; else os << c; break; default: // Escape control chars - based on contribution by @espenalb in PR #465 and // by @mrpi PR #588 if ( ( c >= 0 && c < '\x09' ) || ( c > '\x0D' && c < '\x20') || c=='\x7F' ) os << "&#x" << std::uppercase << std::hex << std::setfill('0') << std::setw(2) << static_cast( c ) << ';'; else os << c; } } } friend std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) { xmlEncode.encodeTo( os ); return os; } private: std::string m_str; ForWhat m_forWhat; }; class XmlWriter { public: class ScopedElement { public: ScopedElement( XmlWriter* writer ) : m_writer( writer ) {} ScopedElement( ScopedElement const& other ) : m_writer( other.m_writer ){ other.m_writer = CATCH_NULL; } ~ScopedElement() { if( m_writer ) m_writer->endElement(); } ScopedElement& writeText( std::string const& text, bool indent = true ) { m_writer->writeText( text, indent ); return *this; } template ScopedElement& writeAttribute( std::string const& name, T const& attribute ) { m_writer->writeAttribute( name, attribute ); return *this; } private: mutable XmlWriter* m_writer; }; XmlWriter() : m_tagIsOpen( false ), m_needsNewline( false ), m_os( &Catch::cout() ) { // We encode control characters, which requires // XML 1.1 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 *m_os << "\n"; } XmlWriter( std::ostream& os ) : m_tagIsOpen( false ), m_needsNewline( false ), m_os( &os ) { *m_os << "\n"; } ~XmlWriter() { while( !m_tags.empty() ) endElement(); } XmlWriter& startElement( std::string const& name ) { ensureTagClosed(); newlineIfNecessary(); stream() << m_indent << "<" << name; m_tags.push_back( name ); m_indent += " "; m_tagIsOpen = true; return *this; } ScopedElement scopedElement( std::string const& name ) { ScopedElement scoped( this ); startElement( name ); return scoped; } XmlWriter& endElement() { newlineIfNecessary(); m_indent = m_indent.substr( 0, m_indent.size()-2 ); if( m_tagIsOpen ) { stream() << "/>\n"; m_tagIsOpen = false; } else { stream() << m_indent << "\n"; } m_tags.pop_back(); return *this; } XmlWriter& writeAttribute( std::string const& name, std::string const& attribute ) { if( !name.empty() && !attribute.empty() ) stream() << " " << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << "\""; return *this; } XmlWriter& writeAttribute( std::string const& name, bool attribute ) { stream() << " " << name << "=\"" << ( attribute ? "true" : "false" ) << "\""; return *this; } template XmlWriter& writeAttribute( std::string const& name, T const& attribute ) { std::ostringstream oss; oss << attribute; return writeAttribute( name, oss.str() ); } XmlWriter& writeText( std::string const& text, bool indent = true ) { if( !text.empty() ){ bool tagWasOpen = m_tagIsOpen; ensureTagClosed(); if( tagWasOpen && indent ) stream() << m_indent; stream() << XmlEncode( text ); m_needsNewline = true; } return *this; } XmlWriter& writeComment( std::string const& text ) { ensureTagClosed(); stream() << m_indent << ""; m_needsNewline = true; return *this; } XmlWriter& writeBlankLine() { ensureTagClosed(); stream() << "\n"; return *this; } void setStream( std::ostream& os ) { m_os = &os; } private: XmlWriter( XmlWriter const& ); void operator=( XmlWriter const& ); std::ostream& stream() { return *m_os; } void ensureTagClosed() { if( m_tagIsOpen ) { stream() << ">\n"; m_tagIsOpen = false; } } void newlineIfNecessary() { if( m_needsNewline ) { stream() << "\n"; m_needsNewline = false; } } bool m_tagIsOpen; bool m_needsNewline; std::vector m_tags; std::string m_indent; std::ostream* m_os; }; } // #included from: catch_reenable_warnings.h #define TWOBLUECUBES_CATCH_REENABLE_WARNINGS_H_INCLUDED #ifdef __clang__ # ifdef __ICC // icpc defines the __clang__ macro # pragma warning(pop) # else # pragma clang diagnostic pop # endif #elif defined __GNUC__ # pragma GCC diagnostic pop #endif namespace Catch { class XmlReporter : public StreamingReporterBase { public: XmlReporter( ReporterConfig const& _config ) : StreamingReporterBase( _config ), m_xml(_config.stream()), m_sectionDepth( 0 ) { m_reporterPrefs.shouldRedirectStdOut = true; } virtual ~XmlReporter() CATCH_OVERRIDE; static std::string getDescription() { return "Reports test results as an XML document"; } public: // StreamingReporterBase virtual void noMatchingTestCases( std::string const& s ) CATCH_OVERRIDE { StreamingReporterBase::noMatchingTestCases( s ); } virtual void testRunStarting( TestRunInfo const& testInfo ) CATCH_OVERRIDE { StreamingReporterBase::testRunStarting( testInfo ); m_xml.startElement( "Catch" ); if( !m_config->name().empty() ) m_xml.writeAttribute( "name", m_config->name() ); } virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { StreamingReporterBase::testGroupStarting( groupInfo ); m_xml.startElement( "Group" ) .writeAttribute( "name", groupInfo.name ); } virtual void testCaseStarting( TestCaseInfo const& testInfo ) CATCH_OVERRIDE { StreamingReporterBase::testCaseStarting(testInfo); m_xml.startElement( "TestCase" ).writeAttribute( "name", testInfo.name ); if ( m_config->showDurations() == ShowDurations::Always ) m_testCaseTimer.start(); } virtual void sectionStarting( SectionInfo const& sectionInfo ) CATCH_OVERRIDE { StreamingReporterBase::sectionStarting( sectionInfo ); if( m_sectionDepth++ > 0 ) { m_xml.startElement( "Section" ) .writeAttribute( "name", trim( sectionInfo.name ) ) .writeAttribute( "description", sectionInfo.description ); } } virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE { } virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { const AssertionResult& assertionResult = assertionStats.assertionResult; // Print any info messages in tags. if( assertionStats.assertionResult.getResultType() != ResultWas::Ok ) { for( std::vector::const_iterator it = assertionStats.infoMessages.begin(), itEnd = assertionStats.infoMessages.end(); it != itEnd; ++it ) { if( it->type == ResultWas::Info ) { m_xml.scopedElement( "Info" ) .writeText( it->message ); } else if ( it->type == ResultWas::Warning ) { m_xml.scopedElement( "Warning" ) .writeText( it->message ); } } } // Drop out if result was successful but we're not printing them. if( !m_config->includeSuccessfulResults() && isOk(assertionResult.getResultType()) ) return true; // Print the expression if there is one. if( assertionResult.hasExpression() ) { m_xml.startElement( "Expression" ) .writeAttribute( "success", assertionResult.succeeded() ) .writeAttribute( "type", assertionResult.getTestMacroName() ) .writeAttribute( "filename", assertionResult.getSourceInfo().file ) .writeAttribute( "line", assertionResult.getSourceInfo().line ); m_xml.scopedElement( "Original" ) .writeText( assertionResult.getExpression() ); m_xml.scopedElement( "Expanded" ) .writeText( assertionResult.getExpandedExpression() ); } // And... Print a result applicable to each result type. switch( assertionResult.getResultType() ) { case ResultWas::ThrewException: m_xml.scopedElement( "Exception" ) .writeAttribute( "filename", assertionResult.getSourceInfo().file ) .writeAttribute( "line", assertionResult.getSourceInfo().line ) .writeText( assertionResult.getMessage() ); break; case ResultWas::FatalErrorCondition: m_xml.scopedElement( "FatalErrorCondition" ) .writeAttribute( "filename", assertionResult.getSourceInfo().file ) .writeAttribute( "line", assertionResult.getSourceInfo().line ) .writeText( assertionResult.getMessage() ); break; case ResultWas::Info: m_xml.scopedElement( "Info" ) .writeText( assertionResult.getMessage() ); break; case ResultWas::Warning: // Warning will already have been written break; case ResultWas::ExplicitFailure: m_xml.scopedElement( "Failure" ) .writeText( assertionResult.getMessage() ); break; default: break; } if( assertionResult.hasExpression() ) m_xml.endElement(); return true; } virtual void sectionEnded( SectionStats const& sectionStats ) CATCH_OVERRIDE { StreamingReporterBase::sectionEnded( sectionStats ); if( --m_sectionDepth > 0 ) { XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResults" ); e.writeAttribute( "successes", sectionStats.assertions.passed ); e.writeAttribute( "failures", sectionStats.assertions.failed ); e.writeAttribute( "expectedFailures", sectionStats.assertions.failedButOk ); if ( m_config->showDurations() == ShowDurations::Always ) e.writeAttribute( "durationInSeconds", sectionStats.durationInSeconds ); m_xml.endElement(); } } virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { StreamingReporterBase::testCaseEnded( testCaseStats ); XmlWriter::ScopedElement e = m_xml.scopedElement( "OverallResult" ); e.writeAttribute( "success", testCaseStats.totals.assertions.allOk() ); if ( m_config->showDurations() == ShowDurations::Always ) e.writeAttribute( "durationInSeconds", m_testCaseTimer.getElapsedSeconds() ); m_xml.endElement(); } virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { StreamingReporterBase::testGroupEnded( testGroupStats ); // TODO: Check testGroupStats.aborting and act accordingly. m_xml.scopedElement( "OverallResults" ) .writeAttribute( "successes", testGroupStats.totals.assertions.passed ) .writeAttribute( "failures", testGroupStats.totals.assertions.failed ) .writeAttribute( "expectedFailures", testGroupStats.totals.assertions.failedButOk ); m_xml.endElement(); } virtual void testRunEnded( TestRunStats const& testRunStats ) CATCH_OVERRIDE { StreamingReporterBase::testRunEnded( testRunStats ); m_xml.scopedElement( "OverallResults" ) .writeAttribute( "successes", testRunStats.totals.assertions.passed ) .writeAttribute( "failures", testRunStats.totals.assertions.failed ) .writeAttribute( "expectedFailures", testRunStats.totals.assertions.failedButOk ); m_xml.endElement(); } private: Timer m_testCaseTimer; XmlWriter m_xml; int m_sectionDepth; }; INTERNAL_CATCH_REGISTER_REPORTER( "xml", XmlReporter ) } // end namespace Catch // #included from: ../reporters/catch_reporter_junit.hpp #define TWOBLUECUBES_CATCH_REPORTER_JUNIT_HPP_INCLUDED #include namespace Catch { class JunitReporter : public CumulativeReporterBase { public: JunitReporter( ReporterConfig const& _config ) : CumulativeReporterBase( _config ), xml( _config.stream() ) { m_reporterPrefs.shouldRedirectStdOut = true; } virtual ~JunitReporter() CATCH_OVERRIDE; static std::string getDescription() { return "Reports test results in an XML format that looks like Ant's junitreport target"; } virtual void noMatchingTestCases( std::string const& /*spec*/ ) CATCH_OVERRIDE {} virtual void testRunStarting( TestRunInfo const& runInfo ) CATCH_OVERRIDE { CumulativeReporterBase::testRunStarting( runInfo ); xml.startElement( "testsuites" ); } virtual void testGroupStarting( GroupInfo const& groupInfo ) CATCH_OVERRIDE { suiteTimer.start(); stdOutForSuite.str(""); stdErrForSuite.str(""); unexpectedExceptions = 0; CumulativeReporterBase::testGroupStarting( groupInfo ); } virtual bool assertionEnded( AssertionStats const& assertionStats ) CATCH_OVERRIDE { if( assertionStats.assertionResult.getResultType() == ResultWas::ThrewException ) unexpectedExceptions++; return CumulativeReporterBase::assertionEnded( assertionStats ); } virtual void testCaseEnded( TestCaseStats const& testCaseStats ) CATCH_OVERRIDE { stdOutForSuite << testCaseStats.stdOut; stdErrForSuite << testCaseStats.stdErr; CumulativeReporterBase::testCaseEnded( testCaseStats ); } virtual void testGroupEnded( TestGroupStats const& testGroupStats ) CATCH_OVERRIDE { double suiteTime = suiteTimer.getElapsedSeconds(); CumulativeReporterBase::testGroupEnded( testGroupStats ); writeGroup( *m_testGroups.back(), suiteTime ); } virtual void testRunEndedCumulative() CATCH_OVERRIDE { xml.endElement(); } void writeGroup( TestGroupNode const& groupNode, double suiteTime ) { XmlWriter::ScopedElement e = xml.scopedElement( "testsuite" ); TestGroupStats const& stats = groupNode.value; xml.writeAttribute( "name", stats.groupInfo.name ); xml.writeAttribute( "errors", unexpectedExceptions ); xml.writeAttribute( "failures", stats.totals.assertions.failed-unexpectedExceptions ); xml.writeAttribute( "tests", stats.totals.assertions.total() ); xml.writeAttribute( "hostname", "tbd" ); // !TBD if( m_config->showDurations() == ShowDurations::Never ) xml.writeAttribute( "time", "" ); else xml.writeAttribute( "time", suiteTime ); xml.writeAttribute( "timestamp", "tbd" ); // !TBD // Write test cases for( TestGroupNode::ChildNodes::const_iterator it = groupNode.children.begin(), itEnd = groupNode.children.end(); it != itEnd; ++it ) writeTestCase( **it ); xml.scopedElement( "system-out" ).writeText( trim( stdOutForSuite.str() ), false ); xml.scopedElement( "system-err" ).writeText( trim( stdErrForSuite.str() ), false ); } void writeTestCase( TestCaseNode const& testCaseNode ) { TestCaseStats const& stats = testCaseNode.value; // All test cases have exactly one section - which represents the // test case itself. That section may have 0-n nested sections assert( testCaseNode.children.size() == 1 ); SectionNode const& rootSection = *testCaseNode.children.front(); std::string className = stats.testInfo.className; if( className.empty() ) { if( rootSection.childSections.empty() ) className = "global"; } writeSection( className, "", rootSection ); } void writeSection( std::string const& className, std::string const& rootName, SectionNode const& sectionNode ) { std::string name = trim( sectionNode.stats.sectionInfo.name ); if( !rootName.empty() ) name = rootName + "/" + name; if( !sectionNode.assertions.empty() || !sectionNode.stdOut.empty() || !sectionNode.stdErr.empty() ) { XmlWriter::ScopedElement e = xml.scopedElement( "testcase" ); if( className.empty() ) { xml.writeAttribute( "classname", name ); xml.writeAttribute( "name", "root" ); } else { xml.writeAttribute( "classname", className ); xml.writeAttribute( "name", name ); } xml.writeAttribute( "time", Catch::toString( sectionNode.stats.durationInSeconds ) ); writeAssertions( sectionNode ); if( !sectionNode.stdOut.empty() ) xml.scopedElement( "system-out" ).writeText( trim( sectionNode.stdOut ), false ); if( !sectionNode.stdErr.empty() ) xml.scopedElement( "system-err" ).writeText( trim( sectionNode.stdErr ), false ); } for( SectionNode::ChildSections::const_iterator it = sectionNode.childSections.begin(), itEnd = sectionNode.childSections.end(); it != itEnd; ++it ) if( className.empty() ) writeSection( name, "", **it ); else writeSection( className, name, **it ); } void writeAssertions( SectionNode const& sectionNode ) { for( SectionNode::Assertions::const_iterator it = sectionNode.assertions.begin(), itEnd = sectionNode.assertions.end(); it != itEnd; ++it ) writeAssertion( *it ); } void writeAssertion( AssertionStats const& stats ) { AssertionResult const& result = stats.assertionResult; if( !result.isOk() ) { std::string elementName; switch( result.getResultType() ) { case ResultWas::ThrewException: case ResultWas::FatalErrorCondition: elementName = "error"; break; case ResultWas::ExplicitFailure: elementName = "failure"; break; case ResultWas::ExpressionFailed: elementName = "failure"; break; case ResultWas::DidntThrowException: elementName = "failure"; break; // We should never see these here: case ResultWas::Info: case ResultWas::Warning: case ResultWas::Ok: case ResultWas::Unknown: case ResultWas::FailureBit: case ResultWas::Exception: elementName = "internalError"; break; } XmlWriter::ScopedElement e = xml.scopedElement( elementName ); xml.writeAttribute( "message", result.getExpandedExpression() ); xml.writeAttribute( "type", result.getTestMacroName() ); std::ostringstream oss; if( !result.getMessage().empty() ) oss << result.getMessage() << "\n"; for( std::vector::const_iterator it = stats.infoMessages.begin(), itEnd = stats.infoMessages.end(); it != itEnd; ++it ) if( it->type == ResultWas::Info ) oss << it->message << "\n"; oss << "at " << result.getSourceInfo(); xml.writeText( oss.str(), false ); } } XmlWriter xml; Timer suiteTimer; std::ostringstream stdOutForSuite; std::ostringstream stdErrForSuite; unsigned int unexpectedExceptions; }; INTERNAL_CATCH_REGISTER_REPORTER( "junit", JunitReporter ) } // end namespace Catch // #included from: ../reporters/catch_reporter_console.hpp #define TWOBLUECUBES_CATCH_REPORTER_CONSOLE_HPP_INCLUDED namespace Catch { struct ConsoleReporter : StreamingReporterBase { ConsoleReporter( ReporterConfig const& _config ) : StreamingReporterBase( _config ), m_headerPrinted( false ) {} virtual ~ConsoleReporter() CATCH_OVERRIDE; static std::string getDescription() { return "Reports test results as plain lines of text"; } virtual void noMatchingTestCases( std::string const& spec ) CATCH_OVERRIDE { stream << "No test cases matched '" << spec << "'" << std::endl; } virtual void assertionStarting( AssertionInfo const& ) CATCH_OVERRIDE { } virtual bool assertionEnded( AssertionStats const& _assertionStats ) CATCH_OVERRIDE { AssertionResult const& result = _assertionStats.assertionResult; bool printInfoMessages = true; // Drop out if result was successful and we're not printing those if( !m_config->includeSuccessfulResults() && result.isOk() ) { if( result.getResultType() != ResultWas::Warning ) return false; printInfoMessages = false; } lazyPrint(); AssertionPrinter printer( stream, _assertionStats, printInfoMessages ); printer.print(); stream << std::endl; return true; } virtual void sectionStarting( SectionInfo const& _sectionInfo ) CATCH_OVERRIDE { m_headerPrinted = false; StreamingReporterBase::sectionStarting( _sectionInfo ); } virtual void sectionEnded( SectionStats const& _sectionStats ) CATCH_OVERRIDE { if( _sectionStats.missingAssertions ) { lazyPrint(); Colour colour( Colour::ResultError ); if( m_sectionStack.size() > 1 ) stream << "\nNo assertions in section"; else stream << "\nNo assertions in test case"; stream << " '" << _sectionStats.sectionInfo.name << "'\n" << std::endl; } if( m_headerPrinted ) { if( m_config->showDurations() == ShowDurations::Always ) stream << "Completed in " << _sectionStats.durationInSeconds << "s" << std::endl; m_headerPrinted = false; } else { if( m_config->showDurations() == ShowDurations::Always ) stream << _sectionStats.sectionInfo.name << " completed in " << _sectionStats.durationInSeconds << "s" << std::endl; } StreamingReporterBase::sectionEnded( _sectionStats ); } virtual void testCaseEnded( TestCaseStats const& _testCaseStats ) CATCH_OVERRIDE { StreamingReporterBase::testCaseEnded( _testCaseStats ); m_headerPrinted = false; } virtual void testGroupEnded( TestGroupStats const& _testGroupStats ) CATCH_OVERRIDE { if( currentGroupInfo.used ) { printSummaryDivider(); stream << "Summary for group '" << _testGroupStats.groupInfo.name << "':\n"; printTotals( _testGroupStats.totals ); stream << "\n" << std::endl; } StreamingReporterBase::testGroupEnded( _testGroupStats ); } virtual void testRunEnded( TestRunStats const& _testRunStats ) CATCH_OVERRIDE { printTotalsDivider( _testRunStats.totals ); printTotals( _testRunStats.totals ); stream << std::endl; StreamingReporterBase::testRunEnded( _testRunStats ); } private: class AssertionPrinter { void operator= ( AssertionPrinter const& ); public: AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages ) : stream( _stream ), stats( _stats ), result( _stats.assertionResult ), colour( Colour::None ), message( result.getMessage() ), messages( _stats.infoMessages ), printInfoMessages( _printInfoMessages ) { switch( result.getResultType() ) { case ResultWas::Ok: colour = Colour::Success; passOrFail = "PASSED"; //if( result.hasMessage() ) if( _stats.infoMessages.size() == 1 ) messageLabel = "with message"; if( _stats.infoMessages.size() > 1 ) messageLabel = "with messages"; break; case ResultWas::ExpressionFailed: if( result.isOk() ) { colour = Colour::Success; passOrFail = "FAILED - but was ok"; } else { colour = Colour::Error; passOrFail = "FAILED"; } if( _stats.infoMessages.size() == 1 ) messageLabel = "with message"; if( _stats.infoMessages.size() > 1 ) messageLabel = "with messages"; break; case ResultWas::ThrewException: colour = Colour::Error; passOrFail = "FAILED"; messageLabel = "due to unexpected exception with message"; break; case ResultWas::FatalErrorCondition: colour = Colour::Error; passOrFail = "FAILED"; messageLabel = "due to a fatal error condition"; break; case ResultWas::DidntThrowException: colour = Colour::Error; passOrFail = "FAILED"; messageLabel = "because no exception was thrown where one was expected"; break; case ResultWas::Info: messageLabel = "info"; break; case ResultWas::Warning: messageLabel = "warning"; break; case ResultWas::ExplicitFailure: passOrFail = "FAILED"; colour = Colour::Error; if( _stats.infoMessages.size() == 1 ) messageLabel = "explicitly with message"; if( _stats.infoMessages.size() > 1 ) messageLabel = "explicitly with messages"; break; // These cases are here to prevent compiler warnings case ResultWas::Unknown: case ResultWas::FailureBit: case ResultWas::Exception: passOrFail = "** internal error **"; colour = Colour::Error; break; } } void print() const { printSourceInfo(); if( stats.totals.assertions.total() > 0 ) { if( result.isOk() ) stream << "\n"; printResultType(); printOriginalExpression(); printReconstructedExpression(); } else { stream << "\n"; } printMessage(); } private: void printResultType() const { if( !passOrFail.empty() ) { Colour colourGuard( colour ); stream << passOrFail << ":\n"; } } void printOriginalExpression() const { if( result.hasExpression() ) { Colour colourGuard( Colour::OriginalExpression ); stream << " "; stream << result.getExpressionInMacro(); stream << "\n"; } } void printReconstructedExpression() const { if( result.hasExpandedExpression() ) { stream << "with expansion:\n"; Colour colourGuard( Colour::ReconstructedExpression ); stream << Text( result.getExpandedExpression(), TextAttributes().setIndent(2) ) << "\n"; } } void printMessage() const { if( !messageLabel.empty() ) stream << messageLabel << ":" << "\n"; for( std::vector::const_iterator it = messages.begin(), itEnd = messages.end(); it != itEnd; ++it ) { // If this assertion is a warning ignore any INFO messages if( printInfoMessages || it->type != ResultWas::Info ) stream << Text( it->message, TextAttributes().setIndent(2) ) << "\n"; } } void printSourceInfo() const { Colour colourGuard( Colour::FileName ); stream << result.getSourceInfo() << ": "; } std::ostream& stream; AssertionStats const& stats; AssertionResult const& result; Colour::Code colour; std::string passOrFail; std::string messageLabel; std::string message; std::vector messages; bool printInfoMessages; }; void lazyPrint() { if( !currentTestRunInfo.used ) lazyPrintRunInfo(); if( !currentGroupInfo.used ) lazyPrintGroupInfo(); if( !m_headerPrinted ) { printTestCaseAndSectionHeader(); m_headerPrinted = true; } } void lazyPrintRunInfo() { stream << "\n" << getLineOfChars<'~'>() << "\n"; Colour colour( Colour::SecondaryText ); stream << currentTestRunInfo->name << " is a Catch v" << libraryVersion << " host application.\n" << "Run with -? for options\n\n"; if( m_config->rngSeed() != 0 ) stream << "Randomness seeded to: " << m_config->rngSeed() << "\n\n"; currentTestRunInfo.used = true; } void lazyPrintGroupInfo() { if( !currentGroupInfo->name.empty() && currentGroupInfo->groupsCounts > 1 ) { printClosedHeader( "Group: " + currentGroupInfo->name ); currentGroupInfo.used = true; } } void printTestCaseAndSectionHeader() { assert( !m_sectionStack.empty() ); printOpenHeader( currentTestCaseInfo->name ); if( m_sectionStack.size() > 1 ) { Colour colourGuard( Colour::Headers ); std::vector::const_iterator it = m_sectionStack.begin()+1, // Skip first section (test case) itEnd = m_sectionStack.end(); for( ; it != itEnd; ++it ) printHeaderString( it->name, 2 ); } SourceLineInfo lineInfo = m_sectionStack.front().lineInfo; if( !lineInfo.empty() ){ stream << getLineOfChars<'-'>() << "\n"; Colour colourGuard( Colour::FileName ); stream << lineInfo << "\n"; } stream << getLineOfChars<'.'>() << "\n" << std::endl; } void printClosedHeader( std::string const& _name ) { printOpenHeader( _name ); stream << getLineOfChars<'.'>() << "\n"; } void printOpenHeader( std::string const& _name ) { stream << getLineOfChars<'-'>() << "\n"; { Colour colourGuard( Colour::Headers ); printHeaderString( _name ); } } // if string has a : in first line will set indent to follow it on // subsequent lines void printHeaderString( std::string const& _string, std::size_t indent = 0 ) { std::size_t i = _string.find( ": " ); if( i != std::string::npos ) i+=2; else i = 0; stream << Text( _string, TextAttributes() .setIndent( indent+i) .setInitialIndent( indent ) ) << "\n"; } struct SummaryColumn { SummaryColumn( std::string const& _label, Colour::Code _colour ) : label( _label ), colour( _colour ) {} SummaryColumn addRow( std::size_t count ) { std::ostringstream oss; oss << count; std::string row = oss.str(); for( std::vector::iterator it = rows.begin(); it != rows.end(); ++it ) { while( it->size() < row.size() ) *it = " " + *it; while( it->size() > row.size() ) row = " " + row; } rows.push_back( row ); return *this; } std::string label; Colour::Code colour; std::vector rows; }; void printTotals( Totals const& totals ) { if( totals.testCases.total() == 0 ) { stream << Colour( Colour::Warning ) << "No tests ran\n"; } else if( totals.assertions.total() > 0 && totals.testCases.allPassed() ) { stream << Colour( Colour::ResultSuccess ) << "All tests passed"; stream << " (" << pluralise( totals.assertions.passed, "assertion" ) << " in " << pluralise( totals.testCases.passed, "test case" ) << ")" << "\n"; } else { std::vector columns; columns.push_back( SummaryColumn( "", Colour::None ) .addRow( totals.testCases.total() ) .addRow( totals.assertions.total() ) ); columns.push_back( SummaryColumn( "passed", Colour::Success ) .addRow( totals.testCases.passed ) .addRow( totals.assertions.passed ) ); columns.push_back( SummaryColumn( "failed", Colour::ResultError ) .addRow( totals.testCases.failed ) .addRow( totals.assertions.failed ) ); columns.push_back( SummaryColumn( "failed as expected", Colour::ResultExpectedFailure ) .addRow( totals.testCases.failedButOk ) .addRow( totals.assertions.failedButOk ) ); printSummaryRow( "test cases", columns, 0 ); printSummaryRow( "assertions", columns, 1 ); } } void printSummaryRow( std::string const& label, std::vector const& cols, std::size_t row ) { for( std::vector::const_iterator it = cols.begin(); it != cols.end(); ++it ) { std::string value = it->rows[row]; if( it->label.empty() ) { stream << label << ": "; if( value != "0" ) stream << value; else stream << Colour( Colour::Warning ) << "- none -"; } else if( value != "0" ) { stream << Colour( Colour::LightGrey ) << " | "; stream << Colour( it->colour ) << value << " " << it->label; } } stream << "\n"; } static std::size_t makeRatio( std::size_t number, std::size_t total ) { std::size_t ratio = total > 0 ? CATCH_CONFIG_CONSOLE_WIDTH * number/ total : 0; return ( ratio == 0 && number > 0 ) ? 1 : ratio; } static std::size_t& findMax( std::size_t& i, std::size_t& j, std::size_t& k ) { if( i > j && i > k ) return i; else if( j > k ) return j; else return k; } void printTotalsDivider( Totals const& totals ) { if( totals.testCases.total() > 0 ) { std::size_t failedRatio = makeRatio( totals.testCases.failed, totals.testCases.total() ); std::size_t failedButOkRatio = makeRatio( totals.testCases.failedButOk, totals.testCases.total() ); std::size_t passedRatio = makeRatio( totals.testCases.passed, totals.testCases.total() ); while( failedRatio + failedButOkRatio + passedRatio < CATCH_CONFIG_CONSOLE_WIDTH-1 ) findMax( failedRatio, failedButOkRatio, passedRatio )++; while( failedRatio + failedButOkRatio + passedRatio > CATCH_CONFIG_CONSOLE_WIDTH-1 ) findMax( failedRatio, failedButOkRatio, passedRatio )--; stream << Colour( Colour::Error ) << std::string( failedRatio, '=' ); stream << Colour( Colour::ResultExpectedFailure ) << std::string( failedButOkRatio, '=' ); if( totals.testCases.allPassed() ) stream << Colour( Colour::ResultSuccess ) << std::string( passedRatio, '=' ); else stream << Colour( Colour::Success ) << std::string( passedRatio, '=' ); } else { stream << Colour( Colour::Warning ) << std::string( CATCH_CONFIG_CONSOLE_WIDTH-1, '=' ); } stream << "\n"; } void printSummaryDivider() { stream << getLineOfChars<'-'>() << "\n"; } private: bool m_headerPrinted; }; INTERNAL_CATCH_REGISTER_REPORTER( "console", ConsoleReporter ) } // end namespace Catch // #included from: ../reporters/catch_reporter_compact.hpp #define TWOBLUECUBES_CATCH_REPORTER_COMPACT_HPP_INCLUDED namespace Catch { struct CompactReporter : StreamingReporterBase { CompactReporter( ReporterConfig const& _config ) : StreamingReporterBase( _config ) {} virtual ~CompactReporter(); static std::string getDescription() { return "Reports test results on a single line, suitable for IDEs"; } virtual ReporterPreferences getPreferences() const { ReporterPreferences prefs; prefs.shouldRedirectStdOut = false; return prefs; } virtual void noMatchingTestCases( std::string const& spec ) { stream << "No test cases matched '" << spec << "'" << std::endl; } virtual void assertionStarting( AssertionInfo const& ) { } virtual bool assertionEnded( AssertionStats const& _assertionStats ) { AssertionResult const& result = _assertionStats.assertionResult; bool printInfoMessages = true; // Drop out if result was successful and we're not printing those if( !m_config->includeSuccessfulResults() && result.isOk() ) { if( result.getResultType() != ResultWas::Warning ) return false; printInfoMessages = false; } AssertionPrinter printer( stream, _assertionStats, printInfoMessages ); printer.print(); stream << std::endl; return true; } virtual void testRunEnded( TestRunStats const& _testRunStats ) { printTotals( _testRunStats.totals ); stream << "\n" << std::endl; StreamingReporterBase::testRunEnded( _testRunStats ); } private: class AssertionPrinter { void operator= ( AssertionPrinter const& ); public: AssertionPrinter( std::ostream& _stream, AssertionStats const& _stats, bool _printInfoMessages ) : stream( _stream ) , stats( _stats ) , result( _stats.assertionResult ) , messages( _stats.infoMessages ) , itMessage( _stats.infoMessages.begin() ) , printInfoMessages( _printInfoMessages ) {} void print() { printSourceInfo(); itMessage = messages.begin(); switch( result.getResultType() ) { case ResultWas::Ok: printResultType( Colour::ResultSuccess, passedString() ); printOriginalExpression(); printReconstructedExpression(); if ( ! result.hasExpression() ) printRemainingMessages( Colour::None ); else printRemainingMessages(); break; case ResultWas::ExpressionFailed: if( result.isOk() ) printResultType( Colour::ResultSuccess, failedString() + std::string( " - but was ok" ) ); else printResultType( Colour::Error, failedString() ); printOriginalExpression(); printReconstructedExpression(); printRemainingMessages(); break; case ResultWas::ThrewException: printResultType( Colour::Error, failedString() ); printIssue( "unexpected exception with message:" ); printMessage(); printExpressionWas(); printRemainingMessages(); break; case ResultWas::FatalErrorCondition: printResultType( Colour::Error, failedString() ); printIssue( "fatal error condition with message:" ); printMessage(); printExpressionWas(); printRemainingMessages(); break; case ResultWas::DidntThrowException: printResultType( Colour::Error, failedString() ); printIssue( "expected exception, got none" ); printExpressionWas(); printRemainingMessages(); break; case ResultWas::Info: printResultType( Colour::None, "info" ); printMessage(); printRemainingMessages(); break; case ResultWas::Warning: printResultType( Colour::None, "warning" ); printMessage(); printRemainingMessages(); break; case ResultWas::ExplicitFailure: printResultType( Colour::Error, failedString() ); printIssue( "explicitly" ); printRemainingMessages( Colour::None ); break; // These cases are here to prevent compiler warnings case ResultWas::Unknown: case ResultWas::FailureBit: case ResultWas::Exception: printResultType( Colour::Error, "** internal error **" ); break; } } private: // Colour::LightGrey static Colour::Code dimColour() { return Colour::FileName; } #ifdef CATCH_PLATFORM_MAC static const char* failedString() { return "FAILED"; } static const char* passedString() { return "PASSED"; } #else static const char* failedString() { return "failed"; } static const char* passedString() { return "passed"; } #endif void printSourceInfo() const { Colour colourGuard( Colour::FileName ); stream << result.getSourceInfo() << ":"; } void printResultType( Colour::Code colour, std::string passOrFail ) const { if( !passOrFail.empty() ) { { Colour colourGuard( colour ); stream << " " << passOrFail; } stream << ":"; } } void printIssue( std::string issue ) const { stream << " " << issue; } void printExpressionWas() { if( result.hasExpression() ) { stream << ";"; { Colour colour( dimColour() ); stream << " expression was:"; } printOriginalExpression(); } } void printOriginalExpression() const { if( result.hasExpression() ) { stream << " " << result.getExpression(); } } void printReconstructedExpression() const { if( result.hasExpandedExpression() ) { { Colour colour( dimColour() ); stream << " for: "; } stream << result.getExpandedExpression(); } } void printMessage() { if ( itMessage != messages.end() ) { stream << " '" << itMessage->message << "'"; ++itMessage; } } void printRemainingMessages( Colour::Code colour = dimColour() ) { if ( itMessage == messages.end() ) return; // using messages.end() directly yields compilation error: std::vector::const_iterator itEnd = messages.end(); const std::size_t N = static_cast( std::distance( itMessage, itEnd ) ); { Colour colourGuard( colour ); stream << " with " << pluralise( N, "message" ) << ":"; } for(; itMessage != itEnd; ) { // If this assertion is a warning ignore any INFO messages if( printInfoMessages || itMessage->type != ResultWas::Info ) { stream << " '" << itMessage->message << "'"; if ( ++itMessage != itEnd ) { Colour colourGuard( dimColour() ); stream << " and"; } } } } private: std::ostream& stream; AssertionStats const& stats; AssertionResult const& result; std::vector messages; std::vector::const_iterator itMessage; bool printInfoMessages; }; // Colour, message variants: // - white: No tests ran. // - red: Failed [both/all] N test cases, failed [both/all] M assertions. // - white: Passed [both/all] N test cases (no assertions). // - red: Failed N tests cases, failed M assertions. // - green: Passed [both/all] N tests cases with M assertions. std::string bothOrAll( std::size_t count ) const { return count == 1 ? "" : count == 2 ? "both " : "all " ; } void printTotals( const Totals& totals ) const { if( totals.testCases.total() == 0 ) { stream << "No tests ran."; } else if( totals.testCases.failed == totals.testCases.total() ) { Colour colour( Colour::ResultError ); const std::string qualify_assertions_failed = totals.assertions.failed == totals.assertions.total() ? bothOrAll( totals.assertions.failed ) : ""; stream << "Failed " << bothOrAll( totals.testCases.failed ) << pluralise( totals.testCases.failed, "test case" ) << ", " "failed " << qualify_assertions_failed << pluralise( totals.assertions.failed, "assertion" ) << "."; } else if( totals.assertions.total() == 0 ) { stream << "Passed " << bothOrAll( totals.testCases.total() ) << pluralise( totals.testCases.total(), "test case" ) << " (no assertions)."; } else if( totals.assertions.failed ) { Colour colour( Colour::ResultError ); stream << "Failed " << pluralise( totals.testCases.failed, "test case" ) << ", " "failed " << pluralise( totals.assertions.failed, "assertion" ) << "."; } else { Colour colour( Colour::ResultSuccess ); stream << "Passed " << bothOrAll( totals.testCases.passed ) << pluralise( totals.testCases.passed, "test case" ) << " with " << pluralise( totals.assertions.passed, "assertion" ) << "."; } } }; INTERNAL_CATCH_REGISTER_REPORTER( "compact", CompactReporter ) } // end namespace Catch namespace Catch { // These are all here to avoid warnings about not having any out of line // virtual methods NonCopyable::~NonCopyable() {} IShared::~IShared() {} IStream::~IStream() CATCH_NOEXCEPT {} FileStream::~FileStream() CATCH_NOEXCEPT {} CoutStream::~CoutStream() CATCH_NOEXCEPT {} DebugOutStream::~DebugOutStream() CATCH_NOEXCEPT {} StreamBufBase::~StreamBufBase() CATCH_NOEXCEPT {} IContext::~IContext() {} IResultCapture::~IResultCapture() {} ITestCase::~ITestCase() {} ITestCaseRegistry::~ITestCaseRegistry() {} IRegistryHub::~IRegistryHub() {} IMutableRegistryHub::~IMutableRegistryHub() {} IExceptionTranslator::~IExceptionTranslator() {} IExceptionTranslatorRegistry::~IExceptionTranslatorRegistry() {} IReporter::~IReporter() {} IReporterFactory::~IReporterFactory() {} IReporterRegistry::~IReporterRegistry() {} IStreamingReporter::~IStreamingReporter() {} AssertionStats::~AssertionStats() {} SectionStats::~SectionStats() {} TestCaseStats::~TestCaseStats() {} TestGroupStats::~TestGroupStats() {} TestRunStats::~TestRunStats() {} CumulativeReporterBase::SectionNode::~SectionNode() {} CumulativeReporterBase::~CumulativeReporterBase() {} StreamingReporterBase::~StreamingReporterBase() {} ConsoleReporter::~ConsoleReporter() {} CompactReporter::~CompactReporter() {} IRunner::~IRunner() {} IMutableContext::~IMutableContext() {} IConfig::~IConfig() {} XmlReporter::~XmlReporter() {} JunitReporter::~JunitReporter() {} TestRegistry::~TestRegistry() {} FreeFunctionTestCase::~FreeFunctionTestCase() {} IGeneratorInfo::~IGeneratorInfo() {} IGeneratorsForTest::~IGeneratorsForTest() {} WildcardPattern::~WildcardPattern() {} TestSpec::Pattern::~Pattern() {} TestSpec::NamePattern::~NamePattern() {} TestSpec::TagPattern::~TagPattern() {} TestSpec::ExcludedPattern::~ExcludedPattern() {} Matchers::Impl::StdString::Equals::~Equals() {} Matchers::Impl::StdString::Contains::~Contains() {} Matchers::Impl::StdString::StartsWith::~StartsWith() {} Matchers::Impl::StdString::EndsWith::~EndsWith() {} void Config::dummy() {} namespace TestCaseTracking { ITracker::~ITracker() {} TrackerBase::~TrackerBase() {} SectionTracker::~SectionTracker() {} IndexTracker::~IndexTracker() {} } } #ifdef __clang__ #pragma clang diagnostic pop #endif #endif #ifdef CATCH_CONFIG_MAIN // #included from: internal/catch_default_main.hpp #define TWOBLUECUBES_CATCH_DEFAULT_MAIN_HPP_INCLUDED #ifndef __OBJC__ // Standard C/C++ main entry point int main (int argc, char * argv[]) { return Catch::Session().run( argc, argv ); } #else // __OBJC__ // Objective-C entry point int main (int argc, char * const argv[]) { #if !CATCH_ARC_ENABLED NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init]; #endif Catch::registerTestMethods(); int result = Catch::Session().run( argc, (char* const*)argv ); #if !CATCH_ARC_ENABLED [pool drain]; #endif return result; } #endif // __OBJC__ #endif #ifdef CLARA_CONFIG_MAIN_NOT_DEFINED # undef CLARA_CONFIG_MAIN #endif ////// // If this config identifier is defined then all CATCH macros are prefixed with CATCH_ #ifdef CATCH_CONFIG_PREFIX_ALL #define CATCH_REQUIRE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal, "CATCH_REQUIRE" ) #define CATCH_REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, "CATCH_REQUIRE_FALSE" ) #define CATCH_REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, "", "CATCH_REQUIRE_THROWS" ) #define CATCH_REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_THROWS_AS" ) #define CATCH_REQUIRE_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, matcher, "CATCH_REQUIRE_THROWS_WITH" ) #define CATCH_REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_NOTHROW" ) #define CATCH_CHECK( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK" ) #define CATCH_CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, "CATCH_CHECK_FALSE" ) #define CATCH_CHECKED_IF( expr ) INTERNAL_CATCH_IF( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECKED_IF" ) #define CATCH_CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECKED_ELSE" ) #define CATCH_CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, "CATCH_CHECK_NOFAIL" ) #define CATCH_CHECK_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, "", "CATCH_CHECK_THROWS" ) #define CATCH_CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_THROWS_AS" ) #define CATCH_CHECK_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, matcher, "CATCH_CHECK_THROWS_WITH" ) #define CATCH_CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_NOTHROW" ) #define CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::ContinueOnFailure, "CATCH_CHECK_THAT" ) #define CATCH_REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::Normal, "CATCH_REQUIRE_THAT" ) #define CATCH_INFO( msg ) INTERNAL_CATCH_INFO( msg, "CATCH_INFO" ) #define CATCH_WARN( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, "CATCH_WARN", msg ) #define CATCH_SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( msg, "CATCH_INFO" ) #define CATCH_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CATCH_CAPTURE" ) #define CATCH_SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CATCH_CAPTURE" ) #ifdef CATCH_CONFIG_VARIADIC_MACROS #define CATCH_TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ ) #define CATCH_TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ ) #define CATCH_METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ ) #define CATCH_REGISTER_TEST_CASE( Function, ... ) INTERNAL_CATCH_REGISTER_TESTCASE( Function, __VA_ARGS__ ) #define CATCH_SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ ) #define CATCH_FAIL( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "CATCH_FAIL", __VA_ARGS__ ) #define CATCH_SUCCEED( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "CATCH_SUCCEED", __VA_ARGS__ ) #else #define CATCH_TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description ) #define CATCH_TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description ) #define CATCH_METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description ) #define CATCH_REGISTER_TEST_CASE( function, name, description ) INTERNAL_CATCH_REGISTER_TESTCASE( function, name, description ) #define CATCH_SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description ) #define CATCH_FAIL( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "CATCH_FAIL", msg ) #define CATCH_SUCCEED( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "CATCH_SUCCEED", msg ) #endif #define CATCH_ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" ) #define CATCH_REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) #define CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) #define CATCH_GENERATE( expr) INTERNAL_CATCH_GENERATE( expr ) // "BDD-style" convenience wrappers #ifdef CATCH_CONFIG_VARIADIC_MACROS #define CATCH_SCENARIO( ... ) CATCH_TEST_CASE( "Scenario: " __VA_ARGS__ ) #define CATCH_SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ ) #else #define CATCH_SCENARIO( name, tags ) CATCH_TEST_CASE( "Scenario: " name, tags ) #define CATCH_SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags ) #endif #define CATCH_GIVEN( desc ) CATCH_SECTION( std::string( "Given: ") + desc, "" ) #define CATCH_WHEN( desc ) CATCH_SECTION( std::string( " When: ") + desc, "" ) #define CATCH_AND_WHEN( desc ) CATCH_SECTION( std::string( " And: ") + desc, "" ) #define CATCH_THEN( desc ) CATCH_SECTION( std::string( " Then: ") + desc, "" ) #define CATCH_AND_THEN( desc ) CATCH_SECTION( std::string( " And: ") + desc, "" ) // If CATCH_CONFIG_PREFIX_ALL is not defined then the CATCH_ prefix is not required #else #define REQUIRE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal, "REQUIRE" ) #define REQUIRE_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::Normal | Catch::ResultDisposition::FalseTest, "REQUIRE_FALSE" ) #define REQUIRE_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, "", "REQUIRE_THROWS" ) #define REQUIRE_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::Normal, "REQUIRE_THROWS_AS" ) #define REQUIRE_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::Normal, matcher, "REQUIRE_THROWS_WITH" ) #define REQUIRE_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::Normal, "REQUIRE_NOTHROW" ) #define CHECK( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECK" ) #define CHECK_FALSE( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::FalseTest, "CHECK_FALSE" ) #define CHECKED_IF( expr ) INTERNAL_CATCH_IF( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECKED_IF" ) #define CHECKED_ELSE( expr ) INTERNAL_CATCH_ELSE( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECKED_ELSE" ) #define CHECK_NOFAIL( expr ) INTERNAL_CATCH_TEST( expr, Catch::ResultDisposition::ContinueOnFailure | Catch::ResultDisposition::SuppressFail, "CHECK_NOFAIL" ) #define CHECK_THROWS( expr ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, "", "CHECK_THROWS" ) #define CHECK_THROWS_AS( expr, exceptionType ) INTERNAL_CATCH_THROWS_AS( expr, exceptionType, Catch::ResultDisposition::ContinueOnFailure, "CHECK_THROWS_AS" ) #define CHECK_THROWS_WITH( expr, matcher ) INTERNAL_CATCH_THROWS( expr, Catch::ResultDisposition::ContinueOnFailure, matcher, "CHECK_THROWS_WITH" ) #define CHECK_NOTHROW( expr ) INTERNAL_CATCH_NO_THROW( expr, Catch::ResultDisposition::ContinueOnFailure, "CHECK_NOTHROW" ) #define CHECK_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::ContinueOnFailure, "CHECK_THAT" ) #define REQUIRE_THAT( arg, matcher ) INTERNAL_CHECK_THAT( arg, matcher, Catch::ResultDisposition::Normal, "REQUIRE_THAT" ) #define INFO( msg ) INTERNAL_CATCH_INFO( msg, "INFO" ) #define WARN( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, "WARN", msg ) #define SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( msg, "INFO" ) #define CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CAPTURE" ) #define SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( #msg " := " << msg, "CAPTURE" ) #ifdef CATCH_CONFIG_VARIADIC_MACROS #define TEST_CASE( ... ) INTERNAL_CATCH_TESTCASE( __VA_ARGS__ ) #define TEST_CASE_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, __VA_ARGS__ ) #define METHOD_AS_TEST_CASE( method, ... ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, __VA_ARGS__ ) #define REGISTER_TEST_CASE( Function, ... ) INTERNAL_CATCH_REGISTER_TESTCASE( Function, __VA_ARGS__ ) #define SECTION( ... ) INTERNAL_CATCH_SECTION( __VA_ARGS__ ) #define FAIL( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "FAIL", __VA_ARGS__ ) #define SUCCEED( ... ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "SUCCEED", __VA_ARGS__ ) #else #define TEST_CASE( name, description ) INTERNAL_CATCH_TESTCASE( name, description ) #define TEST_CASE_METHOD( className, name, description ) INTERNAL_CATCH_TEST_CASE_METHOD( className, name, description ) #define METHOD_AS_TEST_CASE( method, name, description ) INTERNAL_CATCH_METHOD_AS_TEST_CASE( method, name, description ) #define REGISTER_TEST_CASE( method, name, description ) INTERNAL_CATCH_REGISTER_TESTCASE( method, name, description ) #define SECTION( name, description ) INTERNAL_CATCH_SECTION( name, description ) #define FAIL( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::ExplicitFailure, Catch::ResultDisposition::Normal, "FAIL", msg ) #define SUCCEED( msg ) INTERNAL_CATCH_MSG( Catch::ResultWas::Ok, Catch::ResultDisposition::ContinueOnFailure, "SUCCEED", msg ) #endif #define ANON_TEST_CASE() INTERNAL_CATCH_TESTCASE( "", "" ) #define REGISTER_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_REPORTER( name, reporterType ) #define REGISTER_LEGACY_REPORTER( name, reporterType ) INTERNAL_CATCH_REGISTER_LEGACY_REPORTER( name, reporterType ) #define GENERATE( expr) INTERNAL_CATCH_GENERATE( expr ) #endif #define CATCH_TRANSLATE_EXCEPTION( signature ) INTERNAL_CATCH_TRANSLATE_EXCEPTION( signature ) // "BDD-style" convenience wrappers #ifdef CATCH_CONFIG_VARIADIC_MACROS #define SCENARIO( ... ) TEST_CASE( "Scenario: " __VA_ARGS__ ) #define SCENARIO_METHOD( className, ... ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " __VA_ARGS__ ) #else #define SCENARIO( name, tags ) TEST_CASE( "Scenario: " name, tags ) #define SCENARIO_METHOD( className, name, tags ) INTERNAL_CATCH_TEST_CASE_METHOD( className, "Scenario: " name, tags ) #endif #define GIVEN( desc ) SECTION( std::string(" Given: ") + desc, "" ) #define WHEN( desc ) SECTION( std::string(" When: ") + desc, "" ) #define AND_WHEN( desc ) SECTION( std::string("And when: ") + desc, "" ) #define THEN( desc ) SECTION( std::string(" Then: ") + desc, "" ) #define AND_THEN( desc ) SECTION( std::string(" And: ") + desc, "" ) using Catch::Detail::Approx; #endif // TWOBLUECUBES_SINGLE_INCLUDE_CATCH_HPP_INCLUDED mothur-1.48.0/TestMothur/dataset.cpp000077500000000000000000000130401424121717000173710ustar00rootroot00000000000000// // dataset.cpp // Mothur // // Created by Sarah Westcott on 3/24/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "dataset.h" #include "inputdata.h" /***********************************************************************/ TestDataSet::TestDataSet() { m = MothurOut::getInstance(); gMap = NULL; testDir = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/"; } /***********************************************************************/ void TestDataSet::createCountTable() { fillGroup(); fillNames(); ct = new CountTable(); for (map::iterator itNameMap = nameMap.begin(); itNameMap !=nameMap.end(); itNameMap++) { string firstCol = itNameMap->first; string secondCol = itNameMap->second; vector names; util.splitAtChar(secondCol, names, ','); //set to 0 map groupCounts; int total = 0; vector Groups = gMap->getNamesOfGroups(); ct->setNamesOfGroups(Groups); for (int i = 0; i < Groups.size(); i++) { groupCounts[Groups[i]] = 0; } //get counts for each of the users groups for (int i = 0; i < names.size(); i++) { string group = gMap->getGroup(names[i]); map::iterator it = groupCounts.find(group); //if not found, then this sequence is not from a group we care about if (it != groupCounts.end()) { it->second++; total++; } } if (total != 0) { vector abunds; for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { abunds.push_back(it->second); } ct->push_back(firstCol, abunds); } } delete gMap; gMap = NULL; nameMap.clear(); } /***********************************************************************/ vector TestDataSet::getSubsetFNGFiles() { vector filenames; filenames.push_back(testDir+"test.fasta"); filenames.push_back(testDir+"test.names"); filenames.push_back(testDir+"test.groups"); return filenames; } /***********************************************************************/ string TestDataSet::getCountTableFile() { return testDir+"test.count_table"; } /***********************************************************************/ string TestDataSet::getSharedFile() { return testDir+"test.opti_mcc.shared"; } /***********************************************************************/ string TestDataSet::getRelabundFile() { return testDir+"test.opti_mcc.relabund"; } /***********************************************************************/ vector TestDataSet::getOptiRefFiles() { vector filenames; filenames.push_back(testDir+"silva.v4.unique.fasta"); filenames.push_back(testDir+"silva.v4.count_table"); filenames.push_back(testDir+"silva.v4.unique.dist"); filenames.push_back(testDir+"silva.v4.unique.phylip.dist"); filenames.push_back(testDir+"silva.v4.unique.opti_mcc.list"); filenames.push_back(testDir+"test.fit.dist"); return filenames; } /***********************************************************************/ vector TestDataSet::getOligosFiles() { vector filenames; filenames.push_back(testDir+"GQY1XT001.oligos"); //single filenames.push_back(testDir+"Undetermined.oligos"); //paired filenames.push_back(testDir+"bo.oligos"); //index "NONE" filenames.push_back(testDir+"comboNames.oligos"); //named primers and named barcodes return filenames; } /***********************************************************************/ string TestDataSet::getSubsetFNGDistFile() { return (testDir+"test.dist"); } /***********************************************************************/ string TestDataSet::getSubsetFNGPhylipDistFile() { return (testDir+"test.phylip.dist"); } /***********************************************************************/ void TestDataSet::fillSeqs() { seqs.clear(); //read info from stable file //string testfile = m->getTestFilePath() + "testFile.fasta"; string testfile = testDir+"test.fasta"; ifstream in; util.openInputFile(testfile, in); while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence read(in); util.gobble(in); seqs.push_back(read); } in.close(); } /***********************************************************************/ void TestDataSet::fillNames() { nameMap.clear(); //read info from stable file string testfile = testDir+"test.names"; util.readNames(testfile, nameMap); } /***********************************************************************/ void TestDataSet::fillGroup() { if (gMap != NULL) { delete gMap; gMap = NULL; } //read info from stable file string testfile = testDir+"test.groups"; gMap = new GroupMap(); gMap->readMap(testfile); } /***********************************************************************/ void TestDataSet::fillLookup() { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } lookup.clear(); //read info from stable file string testfile = testDir+"test.opti_mcc.shared"; InputData input(testfile, "sharedfile", nullVector); SharedRAbundVectors* shared = input.getSharedRAbundVectors(); lookup = shared->getSharedRAbundVectors(); } /***********************************************************************/ mothur-1.48.0/TestMothur/dataset.h000066400000000000000000000032741424121717000170430ustar00rootroot00000000000000// // dataset.h // Mothur // // Created by Sarah Westcott on 3/24/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__dataset__ #define __Mothur__dataset__ #include "sequence.hpp" #include "counttable.h" #include "groupmap.h" #include "sharedrabundvector.hpp" #include "fastqdataset.h" #include "utils.hpp" class TestDataSet { public: TestDataSet(); vector getSeqs() { fillSeqs(); return seqs; } map getNameMap() { fillNames(); return nameMap; } GroupMap* getGroupMap() { fillGroup(); return gMap; } CountTable* getCountTable() { createCountTable(); return ct; } vector getLookup() { fillLookup(); return lookup; } vector getSubsetFNGFiles(); //Fasta, name, group returned - containing 100 seqs string getSubsetFNGDistFile(); string getSubsetFNGPhylipDistFile(); vector getOptiRefFiles(); //fasta, count, column, phylip, list, betweendist vector getOligosFiles(); //single, paired, indexes, comboNamesTest string getSharedFile(); //shared string getRelabundFile(); //relabund string getCountTableFile(); //count private: MothurOut* m; Utils util; TestFastqDataSet fastqData; vector seqs; map nameMap; CountTable* ct; GroupMap* gMap; vector lookup; string testDir; void fillNames(); void fillSeqs(); void fillGroup(); void createCountTable(); void fillLookup(); }; #endif /* defined(__Mothur__dataset__) */ mothur-1.48.0/TestMothur/distcdataset.cpp000066400000000000000000000066731424121717000204330ustar00rootroot00000000000000#include "distcdataset.h" #include "getdistscommand.h" #include "listseqscommand.h" #include "getseqscommand.h" /***********************************************************************/ DistCDataSet::DistCDataSet() { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); columnFile = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/stability.MISeq_SOP.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.dist"; countFile = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/stability.count_table"; } /***********************************************************************/ vector DistCDataSet::getFiles(int numSeqs) { vector newFiles; if (numSeqs > 2055) { m->mothurOut("[ERROR]: too many seqs requested in DistCDataSet::getFiles\n"); } else { string inputString = "count=" + countFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: list.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* listCommand = new ListSeqsCommand(inputString); listCommand->execute(); map > filenames = listCommand->getOutputFiles(); delete listCommand; current->setMothurCalling(false); string accnosfile = filenames["accnos"][0]; m->mothurOut("/******************************************/\n"); ifstream in; util.openInputFile(accnosfile, in); ofstream out; util.openOutputFile("temp.accnos", out); int count = 0; string name; while(!in.eof()) { if (m->getControl_pressed()) { break; } in >> name; util.gobble(in); out << name << endl; count++; if (count >= numSeqs) { break; } } in.close(); out.close(); util.mothurRemove(accnosfile); inputString = "count=" + countFile + ", accnos=temp.accnos"; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); filenames = getCommand->getOutputFiles(); delete getCommand; current->setMothurCalling(false); string newCountfile = filenames["count"][0]; m->mothurOut("/******************************************/\n"); inputString = "column=" + columnFile + ", accnos=temp.accnos"; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.dists(" + inputString + ")\n"); current->setMothurCalling(true); Command* getDCommand = new GetDistsCommand(inputString); getDCommand->execute(); filenames = getDCommand->getOutputFiles(); delete getDCommand; current->setMothurCalling(false); string newColumnfile = filenames["column"][0]; m->mothurOut("/******************************************/\n"); newFiles.push_back(newColumnfile); newFiles.push_back(newCountfile); } return newFiles; } /***********************************************************************/ mothur-1.48.0/TestMothur/distcdataset.h000066400000000000000000000012211424121717000200600ustar00rootroot00000000000000// // distcdataset.h // Mothur // // Created by Sarah Westcott on 6/8/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__distcdataset__ #define __Mothur__distcdataset__ #include "mothurout.h" #include "utils.hpp" #include "currentfile.h" class DistCDataSet { public: DistCDataSet(); ~DistCDataSet() {} string getColumnFile() { return columnFile; } vector getFiles(int); string getCountFile() { return countFile; } private: MothurOut* m; Utils util; string columnFile, countFile; CurrentFile* current; }; #endif /* defined(__Mothur__distcdataset__) */ mothur-1.48.0/TestMothur/distpdataset.cpp000066400000000000000000000010601424121717000204310ustar00rootroot00000000000000// // distdataset.cpp // Mothur // // Created by Sarah Westcott on 6/6/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "distpdataset.h" /***********************************************************************/ DistPDataSet::DistPDataSet() { m = MothurOut::getInstance(); phylipFile = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/stability.MISeq_SOP.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.phylip.dist"; } /***********************************************************************/ mothur-1.48.0/TestMothur/distpdataset.h000066400000000000000000000007331424121717000201040ustar00rootroot00000000000000// // distdataset.h // Mothur // // Created by Sarah Westcott on 6/6/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__distdataset__ #define __Mothur__distdataset__ #include "mothurout.h" class DistPDataSet { public: DistPDataSet(); ~DistPDataSet() {} string getPhylipFile() { return phylipFile; } private: MothurOut* m; string phylipFile; }; #endif /* defined(__Mothur__distdataset__) */ mothur-1.48.0/TestMothur/fakes/000077500000000000000000000000001424121717000163305ustar00rootroot00000000000000mothur-1.48.0/TestMothur/fakes/fakemcc.hpp000066400000000000000000000006501424121717000204330ustar00rootroot00000000000000// // fakemcc.hpp // Mothur // // Created by Sarah Westcott on 4/18/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fakemcc_hpp #define fakemcc_hpp #include "mothurout.h" class FakeClusterCalcValues { public: FakeClusterCalcValues() { tp = 823; tn = 1944106; fp = 95; fn = 354; } ~FakeClusterCalcValues() {} long long tp, tn, fp, fn; }; #endif /* fakemcc_hpp */ mothur-1.48.0/TestMothur/fakes/fakeoligos.h000066400000000000000000000076421424121717000206350ustar00rootroot00000000000000// // fakeoligos.h // Mothur // // Created by Sarah Westcott on 5/1/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fakeoligos_h #define fakeoligos_h #include "mothurout.h" class FakeOligos { public: FakeOligos() { bdiffs=1; pdiffs=2; rdiffs=0; ldiffs=0; sdiffs=0; } ~FakeOligos() {} //single void loadSingle() { primers.clear(); barcodes.clear(); revPrimer.clear(); primers["CCGTCAATTCMTTTRAGT"] = 0; barcodes["AATGGTAC"] = 0; //F003D000 barcodes["AACCTGGC"] = 1; //F003D002 barcodes["TTCGTGGC"] = 2; //F003D004 barcodes["TTCTTGAC"] = 3; //F003D006 barcodes["TTCGCGAC"] = 4; //F003D008 barcodes["TCCAGAAC"] = 5; //F003D142 barcodes["AAGGCCTC"] = 6; //F003D144 barcodes["TGACCGTC"] = 7; //F003D146 barcodes["AGGTTGTC"] = 8; //F003D148 barcodes["TGGTGAAC"] = 9; //F003D150 barcodes["AACCGTGTC"] = 10; //MOCK.GQY1XT001 primerNameVector.clear(); barcodeNameVector.clear(); barcodeNameVector.push_back("F003D000"); barcodeNameVector.push_back("F003D002"); barcodeNameVector.push_back("F003D004"); barcodeNameVector.push_back("F003D006"); barcodeNameVector.push_back("F003D008"); barcodeNameVector.push_back("F003D142"); barcodeNameVector.push_back("F003D144"); barcodeNameVector.push_back("F003D146"); barcodeNameVector.push_back("F003D148"); barcodeNameVector.push_back("F003D150"); barcodeNameVector.push_back("MOCK.GQY1XT001"); revPrimer.push_back("ATTACCGCGGCTGCTGG"); linker.push_back("TGAC"); linker.push_back("TTGG"); spacer.push_back("CCAAC"); spacer.push_back("CACTG"); } void loadPaired() { primerNameVector.clear(); barcodeNameVector.clear(); oligosPair F01R2A; F01R2A.forward = "CCAAC"; F01R2A.reverse = "CACTG"; barcodeNameVector.push_back("F01R2A"); ipbarcodes[0] = F01R2A; oligosPair F01R2B; F01R2B.forward = "CCAAC"; F01R2B.reverse = "AACCA"; barcodeNameVector.push_back("F01R2B"); ipbarcodes[1] = F01R2B; oligosPair F01R2C; F01R2C.forward = "CCAAC"; F01R2C.reverse = "TGTCA"; barcodeNameVector.push_back("F01R2C"); ipbarcodes[2] = F01R2C; oligosPair F01R2D; F01R2D.forward = "CCAAC"; F01R2D.reverse = "AAACC"; barcodeNameVector.push_back("F01R2D"); ipbarcodes[3] = F01R2D; oligosPair F02R2A; F02R2A.forward = "GGTTG"; F02R2A.reverse = "CACTG"; barcodeNameVector.push_back("F02R2A"); ipbarcodes[4] = F02R2A; oligosPair F02R2B; F02R2B.forward = "GGTTG"; F02R2B.reverse = "AACCA"; barcodeNameVector.push_back("F02R2B"); ipbarcodes[5] = F02R2B; oligosPair F02R2C; F02R2C.forward = "GGTTG"; F02R2C.reverse = "TGTCA"; barcodeNameVector.push_back("F02R2C"); ipbarcodes[6] = F02R2C; oligosPair F02R2D; F02R2D.forward = "GGTTG"; F02R2D.reverse = "AAACC"; barcodeNameVector.push_back("F02R2D"); ipbarcodes[7] = F02R2D; oligosPair F05R2F; F05R2F.forward = "CTTAC"; F05R2F.reverse = "GGGTT"; barcodeNameVector.push_back("F05R2F"); ipbarcodes[8] = F05R2F; oligosPair V3; V3.forward = "CCTACGGGAGGCAGCAG"; V3.reverse = "ATTACCGCGGCTGCTGG"; primerNameVector.push_back("V3"); ipprimers[0] = V3; oligosPair V4; V4.forward = "ATTAGAWACCCBDGTAGTCC"; V4.reverse = "CCCGTCAATTCMTTTRAGT"; primerNameVector.push_back("V4"); ipprimers[1] = V4; oligosPair V5; V5.forward = "ACTYAAAKGAATTGACGGG"; V5.reverse = "ACRACACGAGCTGACGAC"; primerNameVector.push_back("V5"); ipprimers[2] = V5; } int bdiffs, pdiffs, rdiffs, ldiffs, sdiffs; map barcodes; map primers; vector revPrimer; vector linker; vector spacer; map ipbarcodes; map ipprimers; vector primerNameVector; vector barcodeNameVector; }; #endif /* fakeoligos_h */ mothur-1.48.0/TestMothur/fakes/fakeoptimatrix.cpp000066400000000000000000000025341424121717000220670ustar00rootroot00000000000000// // fakeoptimatrix.cpp // Mothur // // Created by Sarah Westcott on 4/20/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "fakeoptimatrix.hpp" /***********************************************************************/ FakeOptiMatrix::FakeOptiMatrix() : OptiData(0.03) { try { m = MothurOut::getInstance(); //create 10 singletons for (int i = 90; i < 100; i++) { singletons.push_back(toString(i)); } //create 90 non singletons for (int i = 0; i < 90; i++) { nameMap.push_back(toString(i)); } closeness.resize(90); int count = 0; for (int i = 0; i < 9; i++) { set close; //create list of all sequences in this set for (int j = 0; j < 10; j++) { close.insert((j+count)); } for (set::iterator it = close.begin(); it != close.end(); it++) { //add close sequences to each sequence in this set, do not include self for (int j = 0; j < 10; j++) { if ((j+count) != *it) { closeness[j+count].insert(*it); } } } count += 10; } } catch(exception& e) { m->errorOut(e, "FakeOptiMatrix", "FakeOptiMatrix"); exit(1); } } /***********************************************************************/ mothur-1.48.0/TestMothur/fakes/fakeoptimatrix.hpp000066400000000000000000000005511424121717000220710ustar00rootroot00000000000000// // fakeoptimatrix.hpp // Mothur // // Created by Sarah Westcott on 4/20/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fakeoptimatrix_hpp #define fakeoptimatrix_hpp #include "optidata.hpp" class FakeOptiMatrix : public OptiData { public: FakeOptiMatrix(); ~FakeOptiMatrix(){ } }; #endif /* fakeoptimatrix_hpp */ mothur-1.48.0/TestMothur/fastqdataset.cpp000066400000000000000000000045731424121717000204400ustar00rootroot00000000000000// // fastqdataset.cpp // Mothur // // Created by Sarah Westcott on 3/31/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "fastqdataset.h" /***********************************************************************/ TestFastqDataSet::TestFastqDataSet() { m = MothurOut::getInstance(); } /***********************************************************************/ vector TestFastqDataSet::getSubsetFRFastq(int numSeqs) { fillForwardFastq(); fillReverseFastq(); ofstream out, out2; util.openOutputFile("tempForward.txt", out); util.openOutputFile("tempReverse.txt", out2); for (int i = 0; i < numSeqs; i++) { ffastqReads[i].printFastq(out); rfastqReads[i].printFastq(out2); } vector filenames; filenames.push_back("tempForward.txt"); filenames.push_back("tempReverse.txt"); return filenames; } /***********************************************************************/ void TestFastqDataSet::fillForwardFastq() { ffastqReads.clear(); //read info from stable file string testfile = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/F8D0_S345_L001_R1_001.fastq"; ifstream in; util.openInputFile(testfile, in); int count = 0; bool ignore = false; string format = "illumina1.8+"; while (!in.eof()) { if (m->getControl_pressed()) { break; } if (count < 2000) { FastqRead read(in, ignore, format); util.gobble(in); if (!ignore) { ffastqReads.push_back(read); count++; } }else { break; } } in.close(); } /***********************************************************************/ void TestFastqDataSet::fillReverseFastq() { rfastqReads.clear(); //read info from stable file string testfile = "/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/F8D0_S345_L001_R2_001.fastq"; ifstream in; util.openInputFile(testfile, in); int count = 0; bool ignore = false; string format = "illumina1.8+"; while (!in.eof()) { if (m->getControl_pressed()) { break; } if (count < 2000) { FastqRead read(in, ignore, format); util.gobble(in); if (!ignore) { rfastqReads.push_back(read); count++; } }else { break; } } in.close(); } /***********************************************************************/ mothur-1.48.0/TestMothur/fastqdataset.h000066400000000000000000000014011424121717000200700ustar00rootroot00000000000000// // fastqdataset.h // Mothur // // Created by Sarah Westcott on 3/31/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__fastqdataset__ #define __Mothur__fastqdataset__ #include "fastqread.h" class TestFastqDataSet { public: TestFastqDataSet(); ~TestFastqDataSet() {} vector getForwardFastq() { fillForwardFastq(); return ffastqReads; } vector getReverseFastq() { fillReverseFastq(); return rfastqReads; } vector getSubsetFRFastq(int); private: MothurOut* m; Utils util; vector ffastqReads; vector rfastqReads; void fillForwardFastq(); void fillReverseFastq(); }; #endif /* defined(__Mothur__fastqdataset__) */ mothur-1.48.0/TestMothur/main.cpp000066400000000000000000000027441424121717000166760ustar00rootroot00000000000000#ifndef MAIN_TEST #define MAIN_TEST // // main.cpp // TestMothur // // Created by Sarah Westcott on 3/23/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "mothurout.h" #include "currentfile.h" #include "commandfactory.hpp" #include "gtest/gtest.h" /* Test Naming Structure: Test_OptionalSubCategory_TestClass Test_Container_Sequence Test_Calcs_ClusterCalcs Makes it easy to filter tests ::testing::GTEST_FLAG(filter) = "Test_Container_*"; ::testing::GTEST_FLAG(filter) = "Test_*"; ::testing::GTEST_FLAG(filter) = "Test_Calcs*"; Test_TrimOligos */ CommandFactory* CommandFactory::_uniqueInstance; CurrentFile* CurrentFile::instance; MothurOut* MothurOut::_uniqueInstance; int main(int argc, char **argv) { MothurOut* m; m = MothurOut::getInstance(); CurrentFile* current; current = CurrentFile::getInstance(); current->setTestFilePath("/Users/sarahwestcott/Desktop/mothur/TestMothur/TestFiles/"); string pathname = current->getProgramPath(); if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } current->setTestFilePath(pathname); ::testing::GTEST_FLAG(filter) = "Test_*"; //Test_Command_BiomInfo ::testing::InitGoogleTest(&argc, argv); #ifndef UNIT_TEST #define UNIT_TEST #endif int value = RUN_ALL_TESTS(); return value; } #endif mothur-1.48.0/TestMothur/testbiominfocommand.cpp000066400000000000000000000054061424121717000220110ustar00rootroot00000000000000// // testbiominfocommand.cpp // Mothur // // Created by Sarah Westcott on 8/18/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testbiominfocommand.h" /**************************************************************************************************/ TestBiomInfoCommand::TestBiomInfoCommand() { m = MothurOut::getInstance(); } /**************************************************************************************************/ TestBiomInfoCommand::~TestBiomInfoCommand() { } /**************************************************************************************************/ TEST(Test_Command_BiomInfo, getDims) { TestBiomInfoCommand testTrim; string input = "shape: [28,3]"; int numRows = 0; int numCols = 0; testTrim.getDims(input, numRows, numCols); EXPECT_EQ(28, numRows); EXPECT_EQ(3, numCols); } TEST(Test_Command_BiomInfo, getName) { TestBiomInfoCommand testTrim; string input = "id:B"; EXPECT_EQ("B", testTrim.getName(input)); } TEST(Test_Command_BiomInfo, getTaxonomy) { TestBiomInfoCommand testTrim; string tax = "taxonomy:k__Bacteria,p__Firmicutes,c__Bacilli,o__Turicibacterales,f__Turicibacteraceae,g__Turicibacter,s__"; string boot = "bootstrap:100,100,100,100,100,100,100"; EXPECT_EQ("k__Bacteria(100);p__Firmicutes(100);c__Bacilli(100);o__Turicibacterales(100);f__Turicibacteraceae(100);g__Turicibacter(100);s__(100);", testTrim.getTaxonomy(tax, boot)); } TEST(Test_Command_BiomInfo, readRows) { TestBiomInfoCommand testTrim; string input = "columns:[{id:A, metadata:null},{id:B, metadata:null},{id:C, metadata:null}]"; int numRows = 0; bool hasTaxonomy = true; EXPECT_EQ("A", testTrim.readRows(input, numRows, hasTaxonomy)[0][0]); EXPECT_EQ(3, numRows); EXPECT_EQ(false, hasTaxonomy); } TEST(Test_Command_BiomInfo, readData) { TestBiomInfoCommand testTrim; string input = "data: [[0,2,5], [1,2,5], [2,1,2], [3,1,1], [4,1,1], [5,0,18], [5,1,12], [6,0,15], [6,1,4], [7,0,1], [7,1,1], [8,1,1], [9,0,2], [9,1,6], [9,2,4], [10,1,2], [11,0,5], [11,1,1], [11,2,4], [12,0,1], [13,0,1], [13,1,2], [14,1,2], [15,1,5], [16,0,8], [16,1,1], [17,1,2], [18,0,13], [19,0,2], [19,1,1], [20,0,15], [20,1,27], [20,2,11], [21,1,10], [22,2,18], [23,2,5], [24,0,1], [24,2,20], [25,1,2], [25,2,2], [26,0,1], [26,1,1], [27,0,1]]"""; string matrixFormat = "sparse"; string matrixElementType = "int"; vector groupNames; groupNames.push_back("A"); groupNames.push_back("B"); groupNames.push_back("C"); int numOtus = 28; EXPECT_EQ(12, testTrim.readData(matrixFormat, input, matrixElementType, groupNames, numOtus)->getOTUTotal(9)); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testbiominfocommand.h000066400000000000000000000013451424121717000214540ustar00rootroot00000000000000// // testbiominfocommand.h // Mothur // // Created by Sarah Westcott on 8/18/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__testbiominfocommand__ #define __Mothur__testbiominfocommand__ #include "biominfocommand.h" #include "gtest/gtest.h" class TestBiomInfoCommand : public BiomInfoCommand { public: TestBiomInfoCommand(); ~TestBiomInfoCommand(); MothurOut* m; using BiomInfoCommand::getDims; using BiomInfoCommand::getName; using BiomInfoCommand::getTaxonomy; using BiomInfoCommand::readRows; using BiomInfoCommand::readData; using BiomInfoCommand::getNamesAndTaxonomies; }; #endif /* defined(__Mothur__testbiominfocommand__) */ mothur-1.48.0/TestMothur/testclassifier/000077500000000000000000000000001424121717000202635ustar00rootroot00000000000000mothur-1.48.0/TestMothur/testclassifier/testphylotree.cpp000066400000000000000000000156741424121717000237170ustar00rootroot00000000000000// // testphylotree.cpp // Mothur // // Created by Sarah Westcott on 8/29/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #include "testphylotree.hpp" /**************************************************************************************************/ TestPhyloTree::TestPhyloTree() { //setup m = MothurOut::getInstance(); string tax1WithSpaces = "Bacteria(100);Bacteroidetes 7(100);Bacteroidia(100);Bacteroidales(100);S24-7(100);"; string tax2WithSpaces = "Bacteria(100);Bacteroidetes 7(100);Bacteroidia(98);Bacteroidales(98);Bacteroidaceae(98);Bacteroides(98);"; string tax3WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Lachnospiraceae(100);Blautia(92);"; string tax4WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Ruminococcaceae(100);Anaerotruncus(100);"; string tax5WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Lachnospiraceae(100);Incertae_Sedis(97);"; string tax1WithOutSpaces = "Bacteria(100);Bacteroidetes(100);Bacteroidia(100);Bacteroidales(100);S24-7(100);"; string tax2WithOutSpaces = "Bacteria(100);Bacteroidetes(100);Bacteroidia(98);Bacteroidales(98);Bacteroidaceae(98);Bacteroides(98);"; string tax3WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Lachnospiraceae(100);Blautia(92);"; string tax4WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Ruminococcaceae(100);Anaerotruncus(100);"; string tax5WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Lachnospiraceae(100);Incertae_Sedis(97);"; phylo.addSeqToTree("seq1", tax1WithSpaces); phylo.addSeqToTree("seq2", tax2WithSpaces); phylo.addSeqToTree("seq3", tax3WithSpaces); phylo.addSeqToTree("seq4", tax4WithSpaces); phylo.addSeqToTree("seq5", tax5WithSpaces); phylo.addSeqToTree("seq6", tax1WithOutSpaces); phylo.addSeqToTree("seq7", tax2WithOutSpaces); phylo.addSeqToTree("seq8", tax3WithOutSpaces); phylo.addSeqToTree("seq9", tax4WithOutSpaces); phylo.addSeqToTree("seq10", tax5WithOutSpaces); } /**************************************************************************************************/ TestPhyloTree::~TestPhyloTree() {} /************************************************************************************************** TEST_CASE("Testing PhyloTree Class") { TestPhyloTree testPTree; string tax1WithSpaces = "Bacteria(100);Bacteroidetes 7(100);Bacteroidia(100);Bacteroidales(100);S24-7(100);"; string tax2WithSpaces = "Bacteria(100);Bacteroidetes 7(100);Bacteroidia(98);Bacteroidales(98);Bacteroidaceae(98);Bacteroides(98);"; string tax3WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Lachnospiraceae(100);Blautia(92);"; string tax4WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Ruminococcaceae(100);Anaerotruncus(100);"; string tax5WithSpaces = "Bacteria(100);Firmicutes(100);Clostridia B(100);Clostridiales(100);Lachnospiraceae(100);Incertae_Sedis(97);"; string tax1WithOutSpaces = "Bacteria(100);Bacteroidetes(100);Bacteroidia(100);Bacteroidales(100);S24-7(100);"; string tax2WithOutSpaces = "Bacteria(100);Bacteroidetes(100);Bacteroidia(98);Bacteroidales(98);Bacteroidaceae(98);Bacteroides(98);"; string tax3WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Lachnospiraceae(100);Blautia(92);"; string tax4WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Ruminococcaceae(100);Anaerotruncus(100);"; string tax5WithOutSpaces = "Bacteria(100);Firmicutes(100);Clostridia(100);Clostridiales(100);Lachnospiraceae(100);Incertae_Sedis(97);"; SECTION("Add Sequences to Tree") { INFO("Using taxonomies with and without spaces") // Only appears on a FAIL CAPTURE(testPTree.addSeqToTree("seq1", tax1WithSpaces)); CHECK(testPTree.addSeqToTree("seq1", tax1WithSpaces) == 5); CAPTURE(testPTree.addSeqToTree("seq2", tax2WithSpaces)); CHECK(testPTree.addSeqToTree("seq2", tax2WithSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq3", tax3WithSpaces)); CHECK(testPTree.addSeqToTree("seq3", tax3WithSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq4", tax4WithSpaces)); CHECK(testPTree.addSeqToTree("seq4", tax4WithSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq5", tax5WithSpaces)); CHECK(testPTree.addSeqToTree("seq5", tax5WithSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq6", tax1WithOutSpaces)); CHECK(testPTree.addSeqToTree("seq6", tax1WithOutSpaces) == 5); CAPTURE(testPTree.addSeqToTree("seq7", tax2WithOutSpaces)); CHECK(testPTree.addSeqToTree("seq7", tax2WithOutSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq8", tax3WithOutSpaces)); CHECK(testPTree.addSeqToTree("seq8", tax3WithOutSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq9", tax4WithOutSpaces)); CHECK(testPTree.addSeqToTree("seq9", tax4WithOutSpaces) == 6); CAPTURE(testPTree.addSeqToTree("seq10", tax5WithOutSpaces)); CHECK(testPTree.addSeqToTree("seq10", tax5WithOutSpaces) == 6); } SECTION("Get Seqs") { INFO("Using taxonomies with and without spaces") // Only appears on a FAIL CAPTURE(testPTree.phylo.getSeqs("Bacteroidetes 7").size()); CHECK((testPTree.phylo.getSeqs("Bacteroidetes 7").size()) == 2); vector Bacteroidetes_7 = testPTree.phylo.getSeqs("Bacteroidetes 7"); CHECK(Bacteroidetes_7[0] == "seq1"); CHECK(Bacteroidetes_7[1] == "seq2"); CAPTURE(testPTree.phylo.getSeqs("Clostridia").size()); CHECK((testPTree.phylo.getSeqs("Clostridia").size()) == 3); vector Clostridia = testPTree.phylo.getSeqs("Clostridia"); CHECK(Clostridia[0] == "seq8"); CHECK(Clostridia[1] == "seq9"); CHECK(Clostridia[2] == "seq10"); } SECTION("Get Genus Totals") { INFO("Using taxonomies with and without spaces") // Only appears on a FAIL CAPTURE(testPTree.phylo.getGenusTotals().size()); CHECK(testPTree.phylo.getGenusTotals().size() == 10); } SECTION("Get Full Taxonomy") { INFO("Using taxonomies with and without spaces") // Only appears on a FAIL CAPTURE(testPTree.phylo.getFullTaxonomy("seq1")); CHECK(testPTree.phylo.getFullTaxonomy("seq1") == "Bacteria;Bacteroidetes 7;Bacteroidia;Bacteroidales;S24-7;"); CAPTURE(testPTree.phylo.getFullTaxonomy("seq10")); CHECK(testPTree.phylo.getFullTaxonomy("seq1") == "Bacteria;Firmicutes;Clostridia;Clostridiales;Lachnospiraceae;Incertae_Sedis;"); } } **************************************************************************************************/ mothur-1.48.0/TestMothur/testclassifier/testphylotree.hpp000066400000000000000000000007261424121717000237140ustar00rootroot00000000000000// // testphylotree.hpp // Mothur // // Created by Sarah Westcott on 8/29/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #ifndef testphylotree_hpp #define testphylotree_hpp #include "phylotree.h" class TestPhyloTree : public PhyloTree { public: TestPhyloTree(); ~TestPhyloTree(); MothurOut* m; PhyloTree phylo; //using PhyloTree:: //using PhyloTree:: }; #endif /* testphylotree_hpp */ mothur-1.48.0/TestMothur/testclustercalcs.cpp000066400000000000000000000111571424121717000213370ustar00rootroot00000000000000// // testclustercalcs.cpp // Mothur // // Created by Sarah Westcott on 4/18/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "testclustercalcs.hpp" #include "mcc.hpp" /**************************************************************************************************/ TestClusterCalcs::TestClusterCalcs(string metricName) { //setup if (metricName == "mcc") { metric = new MCC(); } else if (metricName == "sens") { metric = new Sensitivity(); } else if (metricName == "spec") { metric = new Specificity(); } else if (metricName == "tptn") { metric = new TPTN(); } else if (metricName == "tp") { metric = new TP(); } else if (metricName == "tn") { metric = new TN(); } else if (metricName == "fp") { metric = new FP(); } else if (metricName == "fn") { metric = new FN(); } else if (metricName == "f1score") { metric = new F1Score(); } else if (metricName == "accuracy") { metric = new Accuracy(); } else if (metricName == "ppv") { metric = new PPV(); } else if (metricName == "npv") { metric = new NPV(); } else if (metricName == "fdr") { metric = new FDR(); } else if (metricName == "fpfn") { metric = new FPFN(); } } /**************************************************************************************************/ TestClusterCalcs::~TestClusterCalcs() { delete metric; } /**************************************************************************************************/ TEST(Test_Calc_ClusterCalcs, mcc) { TestClusterCalcs test("mcc"); double result = test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn); ASSERT_NEAR(0.791646, result, 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, sens) { TestClusterCalcs test("sens"); ASSERT_NEAR(0.699235, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, spec) { TestClusterCalcs test("spec"); ASSERT_NEAR(0.999951, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, tptn) { TestClusterCalcs test("tptn"); ASSERT_NEAR(0.9997691, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, tp) { TestClusterCalcs test("tp"); ASSERT_NEAR(0.000423, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, tn) { TestClusterCalcs test("tn"); ASSERT_NEAR(0.9993461, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, fp) { TestClusterCalcs test("fp"); ASSERT_NEAR(0.999951, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, fn) { TestClusterCalcs test("fn"); ASSERT_NEAR(0.999818, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, f1score) { TestClusterCalcs test("f1score"); ASSERT_NEAR(0.7856801, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, accuracy) { TestClusterCalcs test("accuracy"); ASSERT_NEAR(0.999769, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, ppv) { TestClusterCalcs test("ppv"); ASSERT_NEAR(0.896514, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, npv) { TestClusterCalcs test("npv"); ASSERT_NEAR(0.9998179, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, fdr) { TestClusterCalcs test("fdr"); ASSERT_NEAR(0.896514, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } TEST(Test_Calc_ClusterCalcs, fpfn) { TestClusterCalcs test("fpfn"); ASSERT_NEAR(0.999769, test.metric->getValue(test.fake.tp,test.fake.tn,test.fake.fp,test.fake.fn), 0.0001); //metric value } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testclustercalcs.hpp000066400000000000000000000014001424121717000213320ustar00rootroot00000000000000// // testclustercalcs.hpp // Mothur // // Created by Sarah Westcott on 4/18/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef testclustercalcs_hpp #define testclustercalcs_hpp #include "gtest/gtest.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" #include "fakemcc.hpp" class TestClusterCalcs { public: TestClusterCalcs(string); ~TestClusterCalcs(); FakeClusterCalcValues fake; ClusterMetric* metric; private: }; #endif /* testclustercalcs_hpp */ mothur-1.48.0/TestMothur/testcommands/000077500000000000000000000000001424121717000177405ustar00rootroot00000000000000mothur-1.48.0/TestMothur/testcommands/testgetgroupscommand.cpp000066400000000000000000000006151424121717000247240ustar00rootroot00000000000000// // testgetgroupscommand.cpp // Mothur // // Created by Sarah Westcott on 7/30/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testgetgroupscommand.h" /* TEST_CASE("Testing GetGroupsCommand Class") { TestGetGroupsCommand tGetGroupsCommand; //how do we unit test this?? //each private function reads files processes them and writes new ones. } */ mothur-1.48.0/TestMothur/testcommands/testgetgroupscommand.h000066400000000000000000000014751424121717000243760ustar00rootroot00000000000000// // testgetgroupscommand.h // Mothur // // Created by Sarah Westcott on 7/30/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__testgetgroupscommand__ #define __Mothur__testgetgroupscommand__ #include "getgroupscommand.h" class TestGetGroupsCommand : public GetGroupsCommand { public: using GetGroupsCommand::readFasta; using GetGroupsCommand::readName; using GetGroupsCommand::readGroup; using GetGroupsCommand::readCount; using GetGroupsCommand::readList; using GetGroupsCommand::readTax; using GetGroupsCommand::fillNames; using GetGroupsCommand::readShared; using GetGroupsCommand::readDesign; using GetGroupsCommand::readPhylip; using GetGroupsCommand::readColumn; }; #endif /* defined(__Mothur__testgetgroupscommand__) */ mothur-1.48.0/TestMothur/testcommands/testmergegroupscommand.cpp000066400000000000000000000006301424121717000252410ustar00rootroot00000000000000// // testmergegroupscommand.cpp // Mothur // // Created by Sarah Westcott on 7/29/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testmergegroupscommand.h" /* TEST_CASE("Testing MergeGroupsCommand Class") { TestMergeGroupsCommand tMergeGroupsCommand; //how do we unit test this?? //each private function reads files processes them and writes new ones. } */ mothur-1.48.0/TestMothur/testcommands/testmergegroupscommand.h000066400000000000000000000011541424121717000247100ustar00rootroot00000000000000// // testmergegroupscommand.h // Mothur // // Created by Sarah Westcott on 7/29/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__testmergegroupscommand__ #define __Mothur__testmergegroupscommand__ #include "mergegroupscommand.h" class TestMergeGroupsCommand : public MergeGroupsCommand { public: //private functions using MergeGroupsCommand::process; using MergeGroupsCommand::processSharedFile; using MergeGroupsCommand::processGroupFile; using MergeGroupsCommand::processCountFile; }; #endif /* defined(__Mothur__testmergegroupscommand__) */ mothur-1.48.0/TestMothur/testcommands/testremovegroupscommand.cpp000066400000000000000000000006331424121717000254420ustar00rootroot00000000000000// // testremovegroupscommand.cpp // Mothur // // Created by Sarah Westcott on 7/30/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testremovegroupscommand.h" /* TEST_CASE("Testing RemoveGroupsCommand Class") { TestRemoveGroupsCommand tRemoveGroupsCommand; //how do we unit test this?? //each private function reads files processes them and writes new ones. }*/ mothur-1.48.0/TestMothur/testcommands/testremovegroupscommand.h000066400000000000000000000015671424121717000251160ustar00rootroot00000000000000// // testremovegroupscommand.h // Mothur // // Created by Sarah Westcott on 7/30/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__testremovegroupscommand__ #define __Mothur__testremovegroupscommand__ #include "removegroupscommand.h" class TestRemoveGroupsCommand : public RemoveGroupsCommand { public: using RemoveGroupsCommand::readFasta; using RemoveGroupsCommand::readName; using RemoveGroupsCommand::readGroup; using RemoveGroupsCommand::readCount; using RemoveGroupsCommand::readList; using RemoveGroupsCommand::readTax; using RemoveGroupsCommand::fillNames; using RemoveGroupsCommand::readShared; using RemoveGroupsCommand::readDesign; using RemoveGroupsCommand::readPhylip; using RemoveGroupsCommand::readColumn; }; #endif /* defined(__Mothur__testremovegroupscommand__) */ mothur-1.48.0/TestMothur/testcommands/testrenamefilecommand.cpp000066400000000000000000000066411424121717000250210ustar00rootroot00000000000000// // testrenamefilecommand.cpp // Mothur // // Created by Sarah Westcott on 5/4/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "testrenamefilecommand.h" #include "dataset.h" /**************************************************************************************************/ TestRenameFileCommand::TestRenameFileCommand() { //setup m = MothurOut::getInstance(); TestDataSet data; filenames = data.getSubsetFNGFiles(); } /**************************************************************************************************/ TestRenameFileCommand::~TestRenameFileCommand() {} /************************************************************************************************** TEST_CASE("Testing RenameFileCommand Class") { TestRenameFileCommand testRename; SECTION("Testing GetNewName - with prefix") { INFO("Using prefix=greatData") // Only appears on a FAIL testRename.prefix = "greatData"; testRename.mothurGenerated = true; CAPTURE(testRename.getNewName(testRename.filenames[0], "fasta")); // Displays this variable on a FAIL CHECK(testRename.getNewName(testRename.filenames[0], "fasta") == "greatData.txt"); testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown } SECTION("Testing GetNewName - with user name") { INFO("Using prefix=greatData") // Only appears on a FAIL testRename.outputfile = "greatData.fasta"; testRename.mothurGenerated = false; CAPTURE(testRename.getNewName(testRename.filenames[0], "fasta")); // Displays this variable on a FAIL CHECK(testRename.getNewName(testRename.filenames[0], "fasta") == "greatData.fasta"); testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown } SECTION("Testing RenameOrCopy - deleteOld=false") { INFO("Uses mothur rename function to move or system command to copy.") // Only appears on a FAIL testRename.deleteOld = false; testRename.renameOrCopy(testRename.filenames[0], "greatData.new.fasta"); ifstream in, in2; bool ableToOpen = testRename.util.openInputFile("greatData.new.fasta", in); in.close(); CAPTURE(ableToOpen); CHECK(ableToOpen == false); bool ableToOpen2 = testRename.util.openInputFile(testRename.filenames[0], in2); in2.close(); CAPTURE(ableToOpen2); CHECK(ableToOpen2 == false); testRename.util.mothurRemove("greatData.new.fasta"); } SECTION("Testing RenameOrCopy - deleteOld=true") { INFO("Uses mothur rename function to move or system command to copy.") // Only appears on a FAIL testRename.deleteOld = true; testRename.renameOrCopy(testRename.filenames[0], "greatData.new.fasta"); ifstream in, in2; bool ableToOpen = testRename.util.openInputFile("greatData.new.fasta", in); in.close(); CAPTURE(ableToOpen); CHECK(ableToOpen == false); testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown } }*/ /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcommands/testrenamefilecommand.h000066400000000000000000000014701424121717000244610ustar00rootroot00000000000000// // testrenamefilecommand.h // Mothur // // Created by Sarah Westcott on 5/4/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__testrenamefilecommand__ #define __Mothur__testrenamefilecommand__ #include "renamefilecommand.h" class TestRenameFileCommand : public RenameFileCommand { public: TestRenameFileCommand(); ~TestRenameFileCommand(); MothurOut* m; vector filenames; //private functions using RenameFileCommand::getNewName; using RenameFileCommand::renameOrCopy; //private variables using RenameFileCommand::prefix; using RenameFileCommand::mothurGenerated; using RenameFileCommand::outputfile; using RenameFileCommand::deleteOld; }; #endif /* defined(__Mothur__testrenamefilecommand__) */ mothur-1.48.0/TestMothur/testcommands/testrenameseqscommand.cpp000066400000000000000000000024051424121717000250470ustar00rootroot00000000000000// // testrenameseqscommand.cpp // Mothur // // Created by Sarah Westcott on 9/29/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testrenameseqscommand.h" /* TEST_CASE("Testing Rename.seqs Command") { TestRenameSeqsCommand tRename; MothurOut* m = MothurOut::getInstance(); //setup environment tRename.fastaFile = m->testDirectory + "final.fasta"; tRename.nameFile = m->testDirectory + "final.names"; tRename.groupfile = m->testDirectory + "final.groups"; //tRename.countfile = m->testDirectory + "final.count_table"; //tRename.qualfile = m->testDirectory + "final.qual"; //tRename.contigsfile = m->testDirectory + "final.contigs"; //tRename.fileFile = m->testDirectory + "final.file"; //tRename.mapFile = m->testDirectory + "final.map"; SECTION("Test execute") { INFO("Using fasta, name and group files") // Only appears on a FAIL tRename.fastaFile = m->testDirectory + "final.fasta"; tRename.nameFile = m->testDirectory + "final.names"; tRename.groupfile = m->testDirectory + "final.groups"; CAPTURE(tRename.execute()); // Displays this variable on a FAIL CHECK(m->getNumErrors() == 0); } //add more tests... } */ mothur-1.48.0/TestMothur/testcommands/testrenameseqscommand.h000066400000000000000000000020231424121717000245100ustar00rootroot00000000000000// // testrenameseqscommand.h // Mothur // // Created by Sarah Westcott on 9/29/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__testrenameseqscommand__ #define __Mothur__testrenameseqscommand__ #include "renameseqscommand.h" class TestRenameSeqsCommand : public RenameSeqsCommand { public: MothurOut* m; //private functions using RenameSeqsCommand::readQual; using RenameSeqsCommand::readContigs; using RenameSeqsCommand::readFasta; using RenameSeqsCommand::processFile; using RenameSeqsCommand::readMapFile; using RenameSeqsCommand::readFiles; //private variables using RenameSeqsCommand::fastaFile; using RenameSeqsCommand::nameFile; using RenameSeqsCommand::groupfile; using RenameSeqsCommand::countfile; using RenameSeqsCommand::qualfile; using RenameSeqsCommand::contigsfile; using RenameSeqsCommand::fileFile; using RenameSeqsCommand::mapFile; }; #endif /* defined(__Mothur__testrenameseqscommand__) */ mothur-1.48.0/TestMothur/testcommands/testsetseedcommand.cpp000066400000000000000000000011051424121717000243340ustar00rootroot00000000000000// // testsetseedcommand.cpp // Mothur // // Created by Sarah Westcott on 3/24/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "setseedcommand.h" /* TEST_CASE("Testing set.seed command") { string optionString = "seed=12345"; Command* setseed = new SetSeedCommand(optionString); SECTION("Testing random seed") { INFO("Using seed=12345") // Only appears on a FAIL setseed->execute(); int randValue = rand()%100 + 1; CHECK(randValue == 16); } delete setseed; }*/ mothur-1.48.0/TestMothur/testcontainers/000077500000000000000000000000001424121717000203045ustar00rootroot00000000000000mothur-1.48.0/TestMothur/testcontainers/testOligos.cpp000066400000000000000000000106551424121717000231530ustar00rootroot00000000000000// // testOligos.cpp // Mothur // // Created by Sarah Westcott on 7/30/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testOligos.hpp" /**************************************************************************************************/ TestOligos::TestOligos() { //setup //m = MothurOut::getInstance(); TestDataSet data; oligosfiles = data.getOligosFiles(); //single, paired, indexes, comboNamesTest } /**************************************************************************************************/ TestOligos::~TestOligos() { //teardown } /**************************************************************************************************/ TEST(Test_Container_Oligos, Constructors) { TestOligos test; Oligos oligos; EXPECT_EQ(oligos.hasPairedPrimers(), false); EXPECT_EQ(oligos.hasPairedBarcodes(), false); Oligos singleOligos(test.oligosfiles[0]); EXPECT_EQ(singleOligos.hasPairedPrimers(), false); EXPECT_EQ(singleOligos.hasPairedBarcodes(), false); //did it read properly map singleBarcodes = singleOligos.getBarcodes(); int F003D000Index = singleBarcodes["AATGGTAC"]; EXPECT_EQ("F003D000", singleOligos.getBarcodeName(F003D000Index)); int MOCKGQY1XT001Index = singleBarcodes["AACCGTGTC"]; EXPECT_EQ("MOCK.GQY1XT001", singleOligos.getBarcodeName(MOCKGQY1XT001Index)); //read with reverseCompliment of reverse primer or barcode Oligos pairedOligos(test.oligosfiles[1]); EXPECT_EQ(pairedOligos.hasPairedPrimers(), true); EXPECT_EQ(pairedOligos.hasPairedBarcodes(), true); map pairedBarcodes = pairedOligos.getPairedBarcodes(); map pairedPrimers = pairedOligos.getPairedPrimers(); oligosPair F01R2A = pairedBarcodes[0]; EXPECT_EQ("F01R2A", pairedOligos.getBarcodeName(0)); EXPECT_EQ("CCAAC", F01R2A.forward); EXPECT_EQ("CAGTG", F01R2A.reverse); oligosPair V3 = pairedPrimers[0]; EXPECT_EQ("V3", pairedOligos.getPrimerName(0)); EXPECT_EQ("CCTACGGGAGGCAGCAG", V3.forward); EXPECT_EQ("CCAGCAGCCGCGGTAAT", V3.reverse); //read WITHOUT reverseCompliment of reverse primer or barcode Oligos pairedOligosNoReverse; pairedOligosNoReverse.read(test.oligosfiles[1], false); EXPECT_EQ(pairedOligosNoReverse.hasPairedPrimers(), true); EXPECT_EQ(pairedOligosNoReverse.hasPairedBarcodes(), true); pairedBarcodes = pairedOligosNoReverse.getPairedBarcodes(); pairedPrimers = pairedOligosNoReverse.getPairedPrimers(); F01R2A = pairedBarcodes[0]; EXPECT_EQ("F01R2A", pairedOligosNoReverse.getBarcodeName(0)); EXPECT_EQ("CCAAC", F01R2A.forward); EXPECT_EQ("CACTG", F01R2A.reverse); V3 = pairedPrimers[0]; EXPECT_EQ("V3", pairedOligosNoReverse.getPrimerName(0)); EXPECT_EQ("CCTACGGGAGGCAGCAG", V3.forward); EXPECT_EQ("ATTACCGCGGCTGCTGG", V3.reverse); //oligos for indexed barcode files Oligos indexedOligos(test.oligosfiles[2]); EXPECT_EQ(indexedOligos.hasPairedPrimers(), true); EXPECT_EQ(indexedOligos.hasPairedBarcodes(), true); pairedBarcodes = indexedOligos.getPairedBarcodes(); pairedPrimers = indexedOligos.getPairedPrimers(); oligosPair Mock3 = pairedBarcodes[0]; EXPECT_EQ("Mock3", indexedOligos.getBarcodeName(0)); EXPECT_EQ("NONE", Mock3.forward); EXPECT_EQ("CAGCTCATCAGC", Mock3.reverse); oligosPair testPrimer = pairedPrimers[0]; EXPECT_EQ("testPrimer", indexedOligos.getPrimerName(0)); EXPECT_EQ("NONE", testPrimer.forward); EXPECT_EQ("ACTYAAAKGAATTGACGG", testPrimer.reverse); } TEST(Test_Container_Oligos, testComboNames) { TestOligos test; Oligos pairedOligos(test.oligosfiles[1]); EXPECT_EQ("F01R2A.V3", pairedOligos.getGroupName(0,0)); EXPECT_EQ("F01R2D.V5", pairedOligos.getGroupName(3,1)); Oligos singleOligos(test.oligosfiles[0]); EXPECT_EQ("F003D000", singleOligos.getGroupName(0,0)); EXPECT_EQ("F003D006", singleOligos.getGroupName(3,1)); Oligos indexedOligos(test.oligosfiles[2]); EXPECT_EQ("Mock3.testPrimer", indexedOligos.getGroupName(0,0)); EXPECT_EQ("CKD_f31.testPrimer2", indexedOligos.getGroupName(1,1)); EXPECT_EQ("CKD_f31.testPrimer", indexedOligos.getGroupName(1,0)); EXPECT_EQ("Mock3.testPrimer2", indexedOligos.getGroupName(0,1)); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testOligos.hpp000066400000000000000000000007331424121717000231540ustar00rootroot00000000000000// // testOligos.hpp // Mothur // // Created by Sarah Westcott on 7/30/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testOligos_hpp #define testOligos_hpp #include "gtest/gtest.h" #include "oligos.h" #include "dataset.h" class TestOligos : public Oligos { public: TestOligos(); ~TestOligos(); //MothurOut* m; vector oligosfiles; //single, paired, indexes, comboNamesTest }; #endif /* testOligos_hpp */ mothur-1.48.0/TestMothur/testcontainers/testcounttable.cpp000066400000000000000000000162501424121717000240540ustar00rootroot00000000000000// // testcounttable.cpp // Mothur // // Created by Sarah Westcott on 10/25/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testcounttable.hpp" /**************************************************************************************************/ TestCountTable::TestCountTable() { //setup m = MothurOut::getInstance(); TestDataSet data; vector filenames = data.getSubsetFNGFiles(); fastafile = filenames[0]; namefile = filenames[1]; groupfile = filenames[2]; countfile = data.getCountTableFile(); } /**************************************************************************************************/ TestCountTable::~TestCountTable() {}//teardown /**************************************************************************************************/ //Testing createTable functions TEST(Test_Container_CountTable, createTables) { //int createTable(string, string, bool); //namefile, groupfile, createGroup TestCountTable testData; CountTable ct; ct.createTable(testData.namefile, testData.groupfile, false); EXPECT_EQ(ct.getNumGroups(), 10); EXPECT_EQ(ct.getNumSeqs(), 200); //int createTable(set&, map&, set&); //seqNames, seqName->group, groupNames set seqNames; seqNames.insert("seq1"); seqNames.insert("seq2"); seqNames.insert("seq3"); seqNames.insert("seq4"); seqNames.insert("seq5"); set groupNames; groupNames.insert("group1"); groupNames.insert("group2"); map groupMap; groupMap["seq1"] = "group1"; groupMap["seq2"] = "group1"; groupMap["seq3"] = "group1"; groupMap["seq4"] = "group2"; groupMap["seq5"] = "group2"; ct.clearTable(); ct.createTable(seqNames, groupMap, groupNames); EXPECT_EQ(ct.getNumGroups(), 2); EXPECT_EQ(ct.getNumSeqs(), 5); //int readTable(string, bool, bool); //filename, readGroups, mothurRunning ct.clearTable(); ct.readTable(testData.countfile, true, true); EXPECT_EQ(ct.getNumGroups(), 10); EXPECT_EQ(ct.getNumSeqs(), 200); ct.clearTable(); ct.readTable(testData.countfile, false, true); EXPECT_EQ(ct.getNumGroups(), 0); EXPECT_EQ(ct.getNumSeqs(), 200); //int readTable(string, string); //filename, format - if format=fasta, read fasta file and create unique table ct.clearTable(); ct.readTable(testData.fastafile, "fasta"); EXPECT_EQ(ct.getNumGroups(), 0); EXPECT_EQ(ct.getNumSeqs(), 93); EXPECT_EQ(ct.getHardCodedHeaders()[0], "Representative_Sequence"); EXPECT_EQ(ct.getHardCodedHeaders()[1], "total"); } /**************************************************************************************************/ //Testing testGroups functions TEST(Test_Container_CountTable, testGroups) { TestCountTable testData; CountTable ct; EXPECT_EQ(ct.testGroups(testData.countfile), true); vector groups; ct.testGroups(testData.countfile, groups); EXPECT_EQ(groups[0], "F003D000"); EXPECT_EQ(groups[1], "F003D002"); EXPECT_EQ(groups[2], "F003D004"); EXPECT_EQ(groups[3], "F003D006"); EXPECT_EQ(groups[4], "F003D008"); EXPECT_EQ(groups[5], "F003D142"); ct.createTable(testData.namefile, testData.groupfile, false); CountTable ct2; ct2.copy(&ct); EXPECT_EQ(ct2.getNumGroups(), 10); EXPECT_EQ(ct2.getNumSeqs(), 200); EXPECT_EQ(ct2.hasGroupInfo(), true); groups = ct2.getNamesOfGroups(); EXPECT_EQ(groups[0], "F003D000"); EXPECT_EQ(groups[1], "F003D002"); EXPECT_EQ(groups[2], "F003D004"); EXPECT_EQ(groups[3], "F003D006"); EXPECT_EQ(groups[4], "F003D008"); EXPECT_EQ(groups[5], "F003D142"); groups.clear(); groups.push_back("group1"); groups.push_back("group2"); groups.push_back("group3"); ct2.setNamesOfGroups(groups); groups = ct2.getNamesOfGroups(); EXPECT_EQ(groups[0], "group1"); EXPECT_EQ(groups[1], "group2"); EXPECT_EQ(groups[2], "group3"); ct2.addGroup("group4"); ct2.removeGroup("group2"); groups = ct2.getNamesOfGroups(); EXPECT_EQ(groups[0], "group1"); EXPECT_EQ(groups[1], "group3"); EXPECT_EQ(groups[2], "group4"); } /**************************************************************************************************/ //Testing testGroups functions TEST(Test_Container_CountTable, push_backs) { TestCountTable testData; CountTable ct; ct.push_back("seq1"); EXPECT_EQ(ct.getNamesOfSeqs()[0], "seq1"); ct.push_back("seq2", 15); EXPECT_EQ(ct.getNumSeqs(), 16); EXPECT_EQ(ct.size(), 2); ct.renameSeq("seq1", "mySeq"); EXPECT_EQ(ct.getNamesOfSeqs()[0], "mySeq"); ct.remove("mySeq"); EXPECT_EQ(ct.getNamesOfSeqs()[0], "seq2"); ct.push_back("seq3", 10); EXPECT_EQ(ct.get("seq2"), 0); EXPECT_EQ(ct.get("seq3"), 1); ct.setNumSeqs("seq3", 100); EXPECT_EQ(ct.getNumSeqs("seq3"), 100); } /**************************************************************************************************/ //Testing testGroups functions TEST(Test_Container_CountTable, push_backGroups) { TestCountTable testData; CountTable ct; ct.createTable(testData.namefile, testData.groupfile, false); ct.setAbund("GQY1XT001B1CEF", "F003D000", 50); EXPECT_EQ(ct.getGroupCount("GQY1XT001B1CEF", "F003D000"), 50); vector abunds; abunds.resize(10, 100); ct.push_back("mySeq", abunds); EXPECT_EQ(ct.getGroupCount("mySeq", "F003D000"), 100); } /**************************************************************************************************/ TEST(Test_Container_CountTable, getSets) { TestCountTable testData; CountTable ct; ct.createTable(testData.namefile, testData.groupfile, false); vector thisSeqsGroups = ct.getGroups("GQY1XT001B1CEF"); EXPECT_EQ(thisSeqsGroups[0], "F003D148"); vector thisSeqsAbunds = ct.getGroupCounts("GQY1XT001B1CEF"); EXPECT_EQ(thisSeqsAbunds[0], 0); EXPECT_EQ(thisSeqsAbunds[8], 1); EXPECT_EQ(ct.getGroupCount("GQY1XT001B1CEF", "F003D148"), 1); EXPECT_EQ(ct.getGroupCount("F003D148"), 21); //EXPECT_EQ(ct.getNumSeqs("fakeSeq"), 0); EXPECT_EQ(ct.getNumSeqs("GQY1XT001CO8VD"), 16); EXPECT_EQ(ct.getNumUniqueSeqs(), 93); } /**************************************************************************************************/ TEST(Test_Container_CountTable, dataStructures) { TestCountTable testData; CountTable ct; ct.createTable(testData.namefile, testData.groupfile, false); EXPECT_EQ(ct.getNamesOfSeqs().size(), 93); EXPECT_EQ(ct.getNamesOfSeqs("F003D148").size(), 19); EXPECT_EQ(ct.getNamesOfSeqs("F003D004").size(), 11); EXPECT_EQ(ct.getNumSeqs("GQY1XT001B1CEF"), 1); EXPECT_EQ(ct.getNumSeqs("GQY1XT001CO8VD"), 16); ct.mergeCounts("GQY1XT001B1CEF", "GQY1XT001CO8VD"); EXPECT_EQ(ct.getNumSeqs("GQY1XT001B1CEF"), 17); } /**************************************************************************************************/ //ListVector getListVector(); //SharedRAbundVectors* getShared(); //SharedRAbundVectors* getShared(vector); //set of groups selected //map getNameMap(); //sequenceName -> total number of sequences it represents mothur-1.48.0/TestMothur/testcontainers/testcounttable.hpp000066400000000000000000000007511424121717000240600ustar00rootroot00000000000000// // testcounttable.hpp // Mothur // // Created by Sarah Westcott on 10/25/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testcounttable_hpp #define testcounttable_hpp #include "gtest/gtest.h" #include "counttable.h" #include "dataset.h" class TestCountTable : public CountTable { public: TestCountTable(); ~TestCountTable(); MothurOut* m; string namefile, groupfile, fastafile, countfile; }; #endif /* testcounttable_hpp */ mothur-1.48.0/TestMothur/testcontainers/testfastqread.cpp000066400000000000000000000033371424121717000236700ustar00rootroot00000000000000// // testfastqread.cpp // Mothur // // Created by Sarah Westcott on 3/29/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "testfastqread.h" #include "dataset.h" /**************************************************************************************************/ TestFastqRead::TestFastqRead() { //setup m = MothurOut::getInstance(); TestFastqDataSet data; reads = data.getForwardFastq(); filenames = data.getSubsetFRFastq(100); } /**************************************************************************************************/ TestFastqRead::~TestFastqRead() { for (int i = 0; i < filenames.size(); i++) { util.mothurRemove(filenames[i]); } //teardown } /**************************************************************************************************/ //Using ATGCGTCATC & 40 39 38 37 36 35 34 33 32 31 TEST(Test_Container_FastqRead, FastqReadConstructor) { TestFastqRead testFastq; vector scores; for (int i = 31; i < 41; i++) { scores.push_back(i); } Sequence seq("testSeq", "ATGCGTCATC"); QualityScores qual("testSeq", scores); FastqRead read(seq, qual); EXPECT_EQ(read.getSeq(), "ATGCGTCATC"); EXPECT_EQ(read.getScores()[0], 31); } TEST(Test_Container_FastqRead, FastqReadFromFileConstructor) { TestFastqRead testFastq; ifstream in; bool ignore; string format = "illumina1.8+"; Utils util; util.openInputFile(testFastq.filenames[0], in); FastqRead read(in, ignore, format); EXPECT_EQ(read.getSeq(), (testFastq.reads[0]).getSeq()); EXPECT_EQ(read.getScores()[0], testFastq.reads[0].getScores()[0]); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testfastqread.h000066400000000000000000000010411424121717000233230ustar00rootroot00000000000000// // testfastqread.h // Mothur // // Created by Sarah Westcott on 3/29/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__testfastqread__ #define __Mothur__testfastqread__ #include "fastqread.h" #include "gtest/gtest.h" class TestFastqRead : public FastqRead { public: TestFastqRead(); ~TestFastqRead(); using FastqRead::convertQual; MothurOut* m; vector reads; vector filenames; }; #endif /* defined(__Mothur__testfastqread__) */ mothur-1.48.0/TestMothur/testcontainers/testoptimatrix.cpp000066400000000000000000000146261424121717000241210ustar00rootroot00000000000000// // testoptimatrix.cpp // Mothur // // Created by Sarah Westcott on 6/6/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "testoptimatrix.h" #include "dataset.h" /**************************************************************************************************/ TestOptiMatrix::TestOptiMatrix() { //setup TestDataSet data; filenames = data.getSubsetFNGFiles(); //Fasta, name, group returned columnFile = data.getSubsetFNGDistFile(); phylipFile = data.getSubsetFNGPhylipDistFile(); } /**************************************************************************************************/ TestOptiMatrix::~TestOptiMatrix() {} /**************************************************************************************************/ ////distfile, dupsFile, dupsFormat, distFormat, cutoff, sim TEST(Test_Container_OptiMatrix, readColumn) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); //EXPECT_EQ(160,(matrix.print(cout))); EXPECT_EQ(160,(matrix.getNumDists())); } TEST(Test_Container_OptiMatrix, readPhylip) { TestOptiMatrix testOMatrix; OptiMatrix pmatrix(testOMatrix.phylipFile, "", "", "phylip", 0.03, false); //EXPECT_EQ(160,(pmatrix.print(cout))); EXPECT_EQ(160,(pmatrix.getNumDists())); } TEST(Test_Container_OptiMatrix, getNumCLose) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); EXPECT_EQ(1,(matrix.getNumClose(0))); EXPECT_EQ(2,(matrix.getNumClose(5))); EXPECT_EQ(3,(matrix.getNumClose(10))); EXPECT_EQ(7,(matrix.getNumClose(15))); EXPECT_EQ(2,(matrix.getNumClose(20))); } TEST(Test_Container_OptiMatrix, isClose) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); //check closeness EXPECT_EQ(true,(matrix.isClose(0, 8))); EXPECT_EQ(true,(matrix.isClose(1, 28))); EXPECT_EQ(true,(matrix.isClose(2, 44))); EXPECT_EQ(true,(matrix.isClose(15, 42))); EXPECT_EQ(true,(matrix.isClose(35, 36))); //check not close EXPECT_EQ(false,(matrix.isClose(57, 8))); EXPECT_EQ(false,(matrix.isClose(47, 28))); EXPECT_EQ(false,(matrix.isClose(32, 44))); EXPECT_EQ(false,(matrix.isClose(23, 42))); EXPECT_EQ(false,(matrix.isClose(12, 36))); } TEST(Test_Container_OptiMatrix, getCloseSeqs) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); //11 GQY1XT001B04KZ,GQY1XT001EBRFH 17 32 52 55 57 string Expected_ReturnResults = ""; Expected_ReturnResults += "17"; Expected_ReturnResults += "32"; Expected_ReturnResults += "52"; Expected_ReturnResults += "55"; Expected_ReturnResults += "57"; set temp = matrix.getCloseSeqs(11); string ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //21 GQY1XT001BUMO0 26 42 46 Expected_ReturnResults = ""; Expected_ReturnResults += "26"; Expected_ReturnResults += "42"; Expected_ReturnResults += "46"; temp = matrix.getCloseSeqs(21); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //31 GQY1XT001CVCKG,GQY1XT001BO8Z9 20 27 Expected_ReturnResults = ""; Expected_ReturnResults += "20"; Expected_ReturnResults += "27"; temp = matrix.getCloseSeqs(31); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //41 GQY1XT001DY3E7 19 29 Expected_ReturnResults = ""; Expected_ReturnResults += "19"; Expected_ReturnResults += "29"; temp = matrix.getCloseSeqs(41); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //51 GQY1XT001EN363,GQY1XT001B0ZKY,GQY1XT001BCPXE,GQY1XT001BEKE1,GQY1XT001D25E1,GQY1XT001EWORZ,GQY1XT001AQB9P,GQY1XT001CEFI4 49 Expected_ReturnResults = ""; Expected_ReturnResults += "49"; temp = matrix.getCloseSeqs(51); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); } TEST(Test_Container_OptiMatrix, getNameIndexMap) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); //maps names to index in closeness matrix map nameIndexMap = matrix.getNameIndexMap(); //check nameMap EXPECT_EQ(0,nameIndexMap["GQY1XT001A4DGI"]); EXPECT_EQ(39,nameIndexMap["GQY1XT001DRYVA"]); EXPECT_EQ(44,nameIndexMap["GQY1XT001E23UK"]); EXPECT_EQ(52,nameIndexMap["GQY1XT001ENMKV"]); EXPECT_EQ(48,nameIndexMap["GQY1XT001EJAUJ"]); EXPECT_EQ("GQY1XT001A4DGI",(matrix.getName(0))); EXPECT_EQ("GQY1XT001DRYVA",(matrix.getName(39))); EXPECT_EQ("GQY1XT001E23UK",(matrix.getName(44))); EXPECT_EQ("GQY1XT001ENMKV",(matrix.getName(52))); EXPECT_EQ("GQY1XT001EJAUJ",(matrix.getName(48))); } TEST(Test_Container_OptiMatrix, getListSingle) { TestOptiMatrix testOMatrix; OptiMatrix matrix(testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", 0.03, false); //maps names to index in closeness matrix ListVector* listSingle = matrix.getListSingle(); //check bin 0 string bin = listSingle->get(0); EXPECT_EQ("GQY1XT001AD34Z", bin); bin = listSingle->get(10); EXPECT_EQ("GQY1XT001BRLCO", bin); bin = listSingle->get(18); EXPECT_EQ("GQY1XT001CKAUI", bin); bin = listSingle->get(3); EXPECT_EQ("GQY1XT001AOSH9,GQY1XT001BLJ4I,GQY1XT001BNIJQ,GQY1XT001CT9JB,GQY1XT001DCPGQ,GQY1XT001DY88Y,GQY1XT001AHO0L,GQY1XT001DRMZK,GQY1XT001DIXY7,GQY1XT001CDBZ1,GQY1XT001B8C47,GQY1XT001A71WZ,GQY1XT001D41QJ,GQY1XT001BAMTS", bin); bin = listSingle->get(7); EXPECT_EQ("GQY1XT001B8UKY", bin); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testoptimatrix.h000066400000000000000000000012611424121717000235550ustar00rootroot00000000000000// // testoptimatrix.h // Mothur // // Created by Sarah Westcott on 6/6/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__testoptimatrix__ #define __Mothur__testoptimatrix__ #include "optimatrix.h" #include "gtest/gtest.h" #include "gmock/gmock.h" class TestOptiMatrix : public OptiMatrix { public: TestOptiMatrix(); ~TestOptiMatrix(); using OptiData::getCloseSeqs; using OptiMatrix::readPhylip; using OptiMatrix::readColumn; using OptiData::print; using OptiData::getNumClose; string columnFile, phylipFile; vector filenames; }; #endif /* defined(__Mothur__testoptimatrix__) */ mothur-1.48.0/TestMothur/testcontainers/testsequence.cpp000066400000000000000000000066361424121717000235330ustar00rootroot00000000000000// // testsequence.cpp // Mothur // // Created by Sarah Westcott on 3/23/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "testsequence.h" #include "dataset.h" /**************************************************************************************************/ TestSequence::TestSequence() { //setup m = MothurOut::getInstance(); TestDataSet data; vector filenames = data.getSubsetFNGFiles(); fastafile = filenames[0]; } /**************************************************************************************************/ TestSequence::~TestSequence() {}//teardown /**************************************************************************************************/ TEST(Test_Container_Sequence, SequenceConstructors) { Sequence seq("testSeq", "ATGCGTCATC"); EXPECT_EQ(seq.getAligned(), "ATGCGTCATC"); EXPECT_EQ(seq.getName(), "testSeq"); EXPECT_EQ(seq.getUnaligned(), "ATGCGTCATC"); Sequence seq1; EXPECT_EQ(seq1.getAligned(), ""); EXPECT_EQ(seq1.getName(), ""); EXPECT_EQ(seq1.getUnaligned(), ""); } TEST(Test_Container_Sequence, setGets) { Sequence seq; seq.setName("mothurSeq"); EXPECT_EQ("mothurSeq", seq.getName()); seq.setAligned("A--TGC-G-TCA--TC"); EXPECT_EQ("A--TGC-G-TCA--TC", seq.getAligned()); seq.setUnaligned("A--TGC-G-TCA--TC"); EXPECT_EQ("ATGCGTCATC", seq.getUnaligned()); seq.reverseComplement(); EXPECT_EQ("GATGACGCAT", seq.getUnaligned()); seq.setComment("This is my sequence comment. It can contain anything I want including numbers and symbols & % -- 234"); EXPECT_EQ("This is my sequence comment. It can contain anything I want including numbers and symbols & % -- 234", seq.getComment()); Sequence seq2("testSeq", "ATNNGTCATC"); EXPECT_EQ("testSeq\tATNNGTCATC", seq2.getInlineSeq()); EXPECT_EQ(2, seq2.getNumNs()); EXPECT_EQ(0, seq.getNumNs()); Sequence seq3("testSeq", "ATGCGTCATC"); EXPECT_EQ(10, seq3.getNumBases()); EXPECT_EQ(1, seq3.getStartPos()); EXPECT_EQ(10, seq3.getEndPos()); Sequence seq4("testSeq", "..A--TGC-G-TCA--TC.."); EXPECT_EQ(3, seq4.getStartPos()); EXPECT_EQ(18, seq4.getEndPos()); seq3.trim(5); EXPECT_EQ("ATGCG", seq3.getAligned()); seq4.trim(5); EXPECT_EQ("ATGCG", seq4.getAligned()); seq4.padToPos(3); EXPECT_EQ("..GCG", seq4.getAligned()); seq4.padFromPos(3); EXPECT_EQ("..G..", seq4.getAligned()); seq.setAligned("ATGCG"); seq.filterToPos(2); EXPECT_EQ("..GCG", seq.getAligned()); seq.setAligned("ATGCG"); seq.filterFromPos(2); EXPECT_EQ("AT...", seq.getAligned()); seq.setAligned("..GCG"); seq.filterFromPos(2); EXPECT_EQ(".....", seq.getAligned()); seq.setAligned("..A--TGC-G-TCA--TC.."); EXPECT_EQ(20, seq.getAlignLength()); EXPECT_EQ(0, seq.getAmbigBases()); seq.setAligned("..A--MGC-G-TCA--TC.."); EXPECT_EQ(1, seq.getAmbigBases()); seq.removeAmbigBases(); EXPECT_EQ(0, seq.getAmbigBases()); EXPECT_EQ("..A---GC-G-TCA--TC..", seq.getAligned()); EXPECT_EQ(1, seq.getLongHomoPolymer()); seq.setAligned("..A--AAA-G-TCA--TC.."); EXPECT_EQ(4, seq.getLongHomoPolymer()); EXPECT_EQ("0000231031", seq.convert2ints()); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testsequence.h000066400000000000000000000007101424121717000231630ustar00rootroot00000000000000// // testsequence.h // Mothur // // Created by Sarah Westcott on 7/27/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef Mothur_testsequence_h #define Mothur_testsequence_h #include "gtest/gtest.h" #include "sequence.hpp" class TestSequence : public Sequence { public: TestSequence(); ~TestSequence(); using Sequence::initialize; MothurOut* m; string fastafile; }; #endif mothur-1.48.0/TestMothur/testcontainers/testsharedrabundfloatvector.cpp000066400000000000000000000164731424121717000266360ustar00rootroot00000000000000// // testsharedrabundfloatvector.cpp // Mothur // // Created by Sarah Westcott on 8/9/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testsharedrabundfloatvector.hpp" #include "dataset.h" /**************************************************************************************************/ TestSharedRabundFloatVector::TestSharedRabundFloatVector() : SharedRAbundFloatVector() { //setup m = MothurOut::getInstance(); TestDataSet data; relabundFile = data.getRelabundFile(); } /**************************************************************************************************/ TestSharedRabundFloatVector::~TestSharedRabundFloatVector() {}//teardown /**************************************************************************************************/ TEST(Test_Container_SharedRabundFloatVector, Constructors) { TestSharedRabundFloatVector test; SharedRAbundFloatVector noGroup; EXPECT_EQ(noGroup.getNumBins(), 0.0); EXPECT_EQ(noGroup.getNumSeqs(), 0.0); EXPECT_EQ(noGroup.getMaxRank(), 0.0); EXPECT_EQ(noGroup.getGroup(), ""); SharedRAbundFloatVector noGroup2(10); EXPECT_EQ(noGroup2.getNumBins(), 10); EXPECT_EQ(noGroup2.getNumSeqs(), 0); EXPECT_EQ(noGroup2.getMaxRank(), 0); EXPECT_EQ(noGroup2.getGroup(), ""); vector abunds(10, 4.5); SharedRAbundFloatVector noGroup3(abunds, 4.5, 10, 45.0); EXPECT_EQ(noGroup3.getNumBins(), 10); EXPECT_EQ(noGroup3.getNumSeqs(), 45.0); EXPECT_EQ(noGroup3.getMaxRank(), 4.5); EXPECT_EQ(noGroup3.getGroup(), ""); SharedRAbundFloatVector noGroup4(abunds); EXPECT_EQ(noGroup3.getNumBins(), 10); EXPECT_EQ(noGroup3.getNumSeqs(), 45.0); EXPECT_EQ(noGroup3.getMaxRank(), 4.5); EXPECT_EQ(noGroup3.getGroup(), ""); SharedRAbundFloatVector noGroup5(noGroup4); EXPECT_EQ(noGroup3.getNumBins(), 10); EXPECT_EQ(noGroup3.getNumSeqs(), 45.0); EXPECT_EQ(noGroup3.getMaxRank(), 4.5); EXPECT_EQ(noGroup3.getGroup(), ""); ifstream in; Utils util; util.openInputFile(test.relabundFile, in); util.getline(in); //gobble headers SharedRAbundFloatVector temp(in); EXPECT_EQ(temp.getNumBins(), 58); ASSERT_NEAR(temp.getNumSeqs(), 1, 0.001); ASSERT_NEAR(temp.getMaxRank(), 0.25, 0.001); EXPECT_EQ(temp.getGroup(), "F003D000"); SharedRAbundFloatVector temp2(in); EXPECT_EQ(temp2.getNumBins(), 58); ASSERT_NEAR(temp2.getNumSeqs(), 1, 0.001); ASSERT_NEAR(temp2.getMaxRank(), 0.16667, 0.001); EXPECT_EQ(temp2.getGroup(), "F003D002"); int numBins; string label, groupN; in >> label >> groupN >> numBins; SharedRAbundFloatVector temp3(in, label, groupN, numBins); EXPECT_EQ(temp3.getNumBins(), 58); ASSERT_NEAR(temp3.getNumSeqs(), 1, 0.001); ASSERT_NEAR(temp3.getMaxRank(), 0.277778, 0.001); EXPECT_EQ(temp3.getGroup(), "F003D004"); in.close(); } TEST(Test_Container_SharedRabundFloatVector, GetsSets) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getGroup(), ""); temp.set(5, 0.5); EXPECT_EQ(temp.getNumSeqs(), 41.5); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.get()[5], 0.5); EXPECT_EQ(temp.get(5), 0.5); EXPECT_EQ(temp.getGroup(), ""); temp.setGroup("myGroup"); EXPECT_EQ(temp.getGroup(), "myGroup"); } TEST(Test_Container_SharedRabundFloatVector, PushBack) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getGroup(), ""); temp.push_back(0.25); EXPECT_EQ(temp.getNumSeqs(), 45.25); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getNumBins(), 11); } TEST(Test_Container_SharedRabundFloatVector, ResizeSize) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getGroup(), ""); temp.resize(20); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getNumBins(), 20); temp.resize(5); EXPECT_EQ(temp.getNumSeqs(), 22.5); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getNumBins(), 5); EXPECT_EQ(temp.size(), 5); } TEST(Test_Container_SharedRabundFloatVector, ClearRemove) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getGroup(), ""); temp.remove(2); EXPECT_EQ(temp.getNumSeqs(), 40.5); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getNumBins(), 9); temp.clear(); EXPECT_EQ(temp.getNumSeqs(), 0); EXPECT_EQ(temp.getMaxRank(), 0); EXPECT_EQ(temp.getNumBins(), 0); EXPECT_EQ(temp.size(), 0); } TEST(Test_Container_SharedRabundFloatVector, GetRabundVector) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); EXPECT_EQ(temp.getGroup(), ""); RAbundVector rabund = temp.getRAbundVector(); EXPECT_EQ(rabund.getNumBins(), 10); EXPECT_EQ(rabund.getNumSeqs(), 40); EXPECT_EQ(rabund.getMaxRank(), 4); EXPECT_EQ(rabund.get(5), 4); } TEST(Test_Container_SharedRabundFloatVector, GetSabundVector) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); SAbundVector sabund = temp.getSAbundVector(); EXPECT_EQ(sabund.getNumBins(), 10); EXPECT_EQ(sabund.getNumSeqs(), 40); EXPECT_EQ(sabund.getMaxRank(), 4); EXPECT_EQ(sabund.get(4), 10); } TEST(Test_Container_SharedRabundFloatVector, RAbundFloatVector) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); RAbundFloatVector rabundFloat = temp.getRAbundFloatVector(); EXPECT_EQ(rabundFloat.getNumBins(), 10.0); EXPECT_EQ(rabundFloat.getNumSeqs(), 45.0); EXPECT_EQ(rabundFloat.getMaxRank(), 4.5); rabundFloat.set(5, 12.5); EXPECT_EQ(rabundFloat.getNumBins(), 10.0); EXPECT_EQ(rabundFloat.getNumSeqs(), 53.0); EXPECT_EQ(rabundFloat.getMaxRank(), 12.5); } TEST(Test_Container_SharedRabundFloatVector, SharedRAbundVector) { vector abunds(10, 4.5); SharedRAbundFloatVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 45.0); EXPECT_EQ(temp.getMaxRank(), 4.5); SharedRAbundVector rabund = temp.getSharedRAbundVector(); EXPECT_EQ(rabund.getNumBins(), 10); EXPECT_EQ(rabund.getNumSeqs(), 40); EXPECT_EQ(rabund.getMaxRank(), 4); rabund.set(5, 12); EXPECT_EQ(rabund.getNumBins(), 10); EXPECT_EQ(rabund.getNumSeqs(), 48); EXPECT_EQ(rabund.getMaxRank(), 12); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundfloatvector.hpp000066400000000000000000000010561424121717000266320ustar00rootroot00000000000000// // testsharedrabundfloatvector.hpp // Mothur // // Created by Sarah Westcott on 8/9/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testsharedrabundfloatvector_hpp #define testsharedrabundfloatvector_hpp #include "sharedrabundfloatvector.hpp" #include "gtest/gtest.h" class TestSharedRabundFloatVector : public SharedRAbundFloatVector { public: TestSharedRabundFloatVector(); ~TestSharedRabundFloatVector(); MothurOut* m; string relabundFile; }; #endif /* testsharedrabundfloatvector_hpp */ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundfloatvectors.cpp000066400000000000000000000204231424121717000270070ustar00rootroot00000000000000// // testsharedrabundfloatvectors.cpp // Mothur // // Created by Sarah Westcott on 8/14/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testsharedrabundfloatvectors.hpp" #include "dataset.h" /**************************************************************************************************/ TestSharedRabundFloatVectors::TestSharedRabundFloatVectors() : SharedRAbundFloatVectors() { //setup m = MothurOut::getInstance(); TestDataSet data; relabundFile = data.getRelabundFile(); } /**************************************************************************************************/ TestSharedRabundFloatVectors::~TestSharedRabundFloatVectors() {}//teardown /**************************************************************************************************/ TEST(Test_Container_SharedRabundFloatVectors, Constructors) { TestSharedRabundFloatVectors test; SharedRAbundFloatVectors temp; EXPECT_EQ(temp.getNumBins(), 0); ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(nextLabel, ""); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); SharedRAbundFloatVectors copy(fileRead); EXPECT_EQ(copy.getLabel(), "0.03"); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); } TEST(Test_Container_SharedRabundFloatVectors, GetsSets) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(nextLabel, ""); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); //setLabels fileRead.setLabels("0.99"); EXPECT_EQ(fileRead.getLabel(), "0.99"); //getOTUTotal ASSERT_NEAR(fileRead.getOTUTotal(4), 0.615171, 0.001); //getOTU vector otu5 = fileRead.getOTU(4); ASSERT_NEAR(otu5[0], 0.0, 0.001); ASSERT_NEAR(otu5[2], 0.055556, 0.001); //get ASSERT_NEAR(fileRead.get(4, "F003D000"), 0.0, 0.001); ASSERT_NEAR(fileRead.get(4, "F003D002"), 0.111111, 0.001); ASSERT_NEAR(fileRead.get(0, "F003D142"), 0.222222, 0.001); //set fileRead.set(4, 0.25, "F003D000"); ASSERT_NEAR(fileRead.get(4, "F003D000"), 0.25, 0.001); fileRead.set(4, 0.1234, "F003D002"); ASSERT_NEAR(fileRead.get(4, "F003D002"), 0.1234, 0.001); fileRead.set(0, 0.456, "F003D142"); ASSERT_NEAR(fileRead.get(0, "F003D142"), 0.456, 0.001); //getOTUNames vector otuNames = fileRead.getOTUNames(); EXPECT_EQ(otuNames[10], "Otu11"); //setOTUNames otuNames[5] = "Otu99"; fileRead.setOTUNames(otuNames); EXPECT_EQ(fileRead.getOTUNames()[5], "Otu99"); EXPECT_EQ(fileRead.getOTUName(5), "Otu99"); fileRead.setOTUName(5, "Otu6"); EXPECT_EQ(fileRead.getOTUName(5), "Otu6"); //getNumBins EXPECT_EQ(fileRead.getNumBins(), 58); //getNumSeqsSmallestGroup ASSERT_NEAR(fileRead.getNumSeqsSmallestGroup(), 1.0, 0.001); //getNamesGroups EXPECT_EQ(fileRead.getNamesGroups()[1], "F003D002"); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 10); //getNumSeqs ASSERT_NEAR(fileRead.getNumSeqs("F003D002"), 1.1234, 0.001); //1.0 + 0.1234 (set from above) } TEST(Test_Container_SharedRabundFloatVectors, PushBack) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 10); vector abunds(58, 0.5); SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(abunds); temp->setGroup("myabunds"); fileRead.push_back(temp); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 11); } TEST(Test_Container_SharedRabundFloatVectors, eliminateZeroOTUS) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.getNumBins(), 58); fileRead.set(16, 0, "F003D142"); EXPECT_EQ(fileRead.get(16, "F003D142"), 0); //zero out bin fileRead.eliminateZeroOTUS(); //remove zeroed out bin EXPECT_EQ(fileRead.getNumBins(), 57); } TEST(Test_Container_SharedRabundFloatVectors, Removes) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.getNumBins(), 58); ASSERT_NEAR(fileRead.removeOTU(16), 0.074074, 0.001); EXPECT_EQ(fileRead.getNumBins(), 57); vector groups; groups.push_back("F003D142"); groups.push_back("F003D002"); groups.push_back("F003D004"); groups.push_back("F003D006"); fileRead.removeGroups(groups); EXPECT_EQ(fileRead.getNumBins(), 41); ASSERT_NEAR(fileRead.getNumSeqsSmallestGroup(), 0.999999, 0.001); fileRead.removeGroups(20); //remove groups with abundance less than 20 EXPECT_EQ(fileRead.getNumGroups(), 0); } TEST(Test_Container_SharedRabundFloatVectors, SizeClear) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.size(), 10); fileRead.clear(); EXPECT_EQ(fileRead.getNumBins(), 0); } TEST(Test_Container_SharedRabundFloatVectors, GetRabundVector) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); RAbundVector temp = fileRead.getRAbundVector(); EXPECT_EQ(temp.get(0), 1); } TEST(Test_Container_SharedRabundFloatVectors, GetSabundVector) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); SAbundVector temp = fileRead.getSAbundVector(); EXPECT_EQ(temp.get(0), 0); //number of OTUs with abundance of 5 EXPECT_EQ(temp.get(1), 1); //number of OTUs with abundance of 1 EXPECT_EQ(temp.getMaxRank(), 1); } TEST(Test_Container_SharedRabundFloatVectors, GetSharedRAbundVectors) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); vector temp = fileRead.getSharedRAbundVectors(); EXPECT_EQ(temp[0]->get(5), 0); //first groups abundance of OTU5 EXPECT_EQ(temp[1]->get(5), 0); //first groups abundance of OTU5 EXPECT_EQ(temp[0]->get(0), 0); //first groups abundance of OTU1 } TEST(Test_Container_SharedRabundFloatVectors, GetSharedRAbundFloatVectors) { TestSharedRabundFloatVectors test; ifstream in; Utils util; util.openInputFile(test.relabundFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundFloatVectors fileRead(in, userGroups, nextLabel, labelTag); vector temp = fileRead.getSharedRAbundFloatVectors(); EXPECT_EQ(temp[0]->get(5), 0.0); //first groups abundance of OTU5 as a float. ASSERT_NEAR(temp[1]->get(5), 0.111111, 0.001); //first groups abundance of OTU5 as a float. ASSERT_NEAR(temp[0]->get(0), 0.05, 0.001); //first groups abundance of OTU1 as a float. } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundfloatvectors.hpp000066400000000000000000000010701424121717000270110ustar00rootroot00000000000000// // testsharedrabundfloatvectors.hpp // Mothur // // Created by Sarah Westcott on 8/14/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testsharedrabundfloatvectors_hpp #define testsharedrabundfloatvectors_hpp #include "sharedrabundfloatvectors.hpp" #include "gtest/gtest.h" class TestSharedRabundFloatVectors : public SharedRAbundFloatVectors { public: TestSharedRabundFloatVectors(); ~TestSharedRabundFloatVectors(); MothurOut* m; string relabundFile; }; #endif /* testsharedrabundfloatvectors_hpp */ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundvector.cpp000066400000000000000000000150661424121717000256050ustar00rootroot00000000000000// // testsharedrabundvector.cpp // Mothur // // Created by Sarah Westcott on 8/9/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testsharedrabundvector.hpp" #include "dataset.h" /**************************************************************************************************/ TestSharedRabundVector::TestSharedRabundVector() : SharedRAbundVector() { //setup m = MothurOut::getInstance(); TestDataSet data; sharedFile = data.getSharedFile(); } /**************************************************************************************************/ TestSharedRabundVector::~TestSharedRabundVector() {}//teardown /**************************************************************************************************/ TEST(Test_Container_SharedRabundVector, Constructors) { TestSharedRabundVector test; SharedRAbundVector noGroup; EXPECT_EQ(noGroup.getNumBins(), 0); EXPECT_EQ(noGroup.getNumSeqs(), 0); EXPECT_EQ(noGroup.getMaxRank(), 0); EXPECT_EQ(noGroup.getGroup(), ""); SharedRAbundVector noGroup2(10); EXPECT_EQ(noGroup2.getNumBins(), 10); EXPECT_EQ(noGroup2.getNumSeqs(), 0); EXPECT_EQ(noGroup2.getMaxRank(), 0); EXPECT_EQ(noGroup2.getGroup(), ""); vector abunds(10, 5); SharedRAbundVector noGroup3(abunds, 5, 10, 50); EXPECT_EQ(noGroup3.getNumBins(), 10); EXPECT_EQ(noGroup3.getNumSeqs(), 50); EXPECT_EQ(noGroup3.getMaxRank(), 5); EXPECT_EQ(noGroup3.getGroup(), ""); SharedRAbundVector noGroup4(abunds); EXPECT_EQ(noGroup4.getNumBins(), 10); EXPECT_EQ(noGroup4.getNumSeqs(), 50); EXPECT_EQ(noGroup4.getMaxRank(), 5); EXPECT_EQ(noGroup4.getGroup(), ""); SharedRAbundVector noGroup5(noGroup4); EXPECT_EQ(noGroup5.getNumBins(), 10); EXPECT_EQ(noGroup5.getNumSeqs(), 50); EXPECT_EQ(noGroup5.getMaxRank(), 5); EXPECT_EQ(noGroup5.getGroup(), ""); ifstream in; Utils util; util.openInputFile(test.sharedFile, in); util.getline(in); //gobble headers SharedRAbundVector temp(in); EXPECT_EQ(temp.getNumBins(), 58); EXPECT_EQ(temp.getNumSeqs(), 20); EXPECT_EQ(temp.getMaxRank(), 5); EXPECT_EQ(temp.getGroup(), "F003D000"); SharedRAbundVector temp2(in); EXPECT_EQ(temp2.getNumBins(), 58); EXPECT_EQ(temp2.getNumSeqs(), 18); EXPECT_EQ(temp2.getMaxRank(), 3); EXPECT_EQ(temp2.getGroup(), "F003D002"); int numBins; string label, groupN; in >> label >> groupN >> numBins; SharedRAbundVector temp3(in, label, groupN, numBins); EXPECT_EQ(temp3.getNumBins(), 58); EXPECT_EQ(temp3.getNumSeqs(), 18); EXPECT_EQ(temp3.getMaxRank(), 5); EXPECT_EQ(temp3.getGroup(), "F003D004"); in.close(); } TEST(Test_Container_SharedRabundVector, GetsSets) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); EXPECT_EQ(temp.getGroup(), ""); temp.set(5, 20); EXPECT_EQ(temp.getNumSeqs(), 65); EXPECT_EQ(temp.getMaxRank(), 20); EXPECT_EQ(temp.get()[5], 20); EXPECT_EQ(temp.get(5), 20); EXPECT_EQ(temp.getGroup(), ""); temp.setGroup("myGroup"); EXPECT_EQ(temp.getGroup(), "myGroup"); } TEST(Test_Container_SharedRabundVector, Increment) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); temp.increment(5); EXPECT_EQ(temp.getNumSeqs(), 51); EXPECT_EQ(temp.getMaxRank(), 6); } TEST(Test_Container_SharedRabundVector, PushBack) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); temp.push_back(20); EXPECT_EQ(temp.getNumSeqs(), 70); EXPECT_EQ(temp.getMaxRank(), 20); EXPECT_EQ(temp.getNumBins(), 11); } TEST(Test_Container_SharedRabundVector, ResizeSize) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); temp.resize(20); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); EXPECT_EQ(temp.getNumBins(), 20); temp.resize(5); EXPECT_EQ(temp.getNumSeqs(), 25); EXPECT_EQ(temp.getMaxRank(), 5); EXPECT_EQ(temp.getNumBins(), 5); EXPECT_EQ(temp.size(), 5); } TEST(Test_Container_SharedRabundVector, ClearRemove) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); temp.remove(2); EXPECT_EQ(temp.getNumSeqs(), 45); EXPECT_EQ(temp.getMaxRank(), 5); EXPECT_EQ(temp.getNumBins(), 9); temp.clear(); EXPECT_EQ(temp.getNumSeqs(), 0); EXPECT_EQ(temp.getMaxRank(), 0); EXPECT_EQ(temp.getNumBins(), 0); EXPECT_EQ(temp.size(), 0); } TEST(Test_Container_SharedRabundVector, GetRabundVector) { vector abunds(10, 10); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 100); EXPECT_EQ(temp.getMaxRank(), 10); RAbundVector rabund = temp.getRAbundVector(); EXPECT_EQ(rabund.getNumBins(), 10); EXPECT_EQ(rabund.getNumSeqs(), 100); EXPECT_EQ(rabund.getMaxRank(), 10); } TEST(Test_Container_SharedRabundVector, GetSabundVector) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); SAbundVector sabund = temp.getSAbundVector(); EXPECT_EQ(sabund.getNumBins(), 10); EXPECT_EQ(sabund.getNumSeqs(), 50); EXPECT_EQ(sabund.getMaxRank(), 5); EXPECT_EQ(sabund.get(5), 10); } TEST(Test_Container_SharedRabundVector, RAbundFloatVector) { vector abunds(10, 5); SharedRAbundVector temp(abunds); EXPECT_EQ(temp.getNumBins(), 10); EXPECT_EQ(temp.getNumSeqs(), 50); EXPECT_EQ(temp.getMaxRank(), 5); RAbundFloatVector rabundFloat = temp.getRAbundFloatVector(); EXPECT_EQ(rabundFloat.getNumBins(), 10.0); EXPECT_EQ(rabundFloat.getNumSeqs(), 50.0); EXPECT_EQ(rabundFloat.getMaxRank(), 5.0); rabundFloat.set(5, 12.5); EXPECT_EQ(rabundFloat.getNumBins(), 10.0); EXPECT_EQ(rabundFloat.getNumSeqs(), 57.5); EXPECT_EQ(rabundFloat.getMaxRank(), 12.5); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundvector.hpp000066400000000000000000000007761424121717000256140ustar00rootroot00000000000000// // testsharedrabundvector.hpp // Mothur // // Created by Sarah Westcott on 8/9/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testsharedrabundvector_hpp #define testsharedrabundvector_hpp #include "sharedrabundvector.hpp" #include "gtest/gtest.h" class TestSharedRabundVector : public SharedRAbundVector { public: TestSharedRabundVector(); ~TestSharedRabundVector(); MothurOut* m; string sharedFile; }; #endif /* testsharedrabundvector_hpp */ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundvectors.cpp000066400000000000000000000203671424121717000257700ustar00rootroot00000000000000// // testsharedrabundvectors.cpp // Mothur // // Created by Sarah Westcott on 8/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testsharedrabundvectors.hpp" #include "dataset.h" /**************************************************************************************************/ TestSharedRabundVectors::TestSharedRabundVectors() : SharedRAbundVectors() { //setup m = MothurOut::getInstance(); TestDataSet data; sharedFile = data.getSharedFile(); } /**************************************************************************************************/ TestSharedRabundVectors::~TestSharedRabundVectors() {}//teardown /**************************************************************************************************/ TEST(Test_Container_SharedRabundVectors, Constructors) { TestSharedRabundVectors test; SharedRAbundVectors temp; EXPECT_EQ(temp.getNumBins(), 0); ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(nextLabel, ""); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); SharedRAbundVectors copy(fileRead); EXPECT_EQ(copy.getLabel(), "0.03"); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); } TEST(Test_Container_SharedRabundVectors, GetsSets) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(nextLabel, ""); EXPECT_EQ(labelTag, "Otu"); EXPECT_EQ(userGroups[0], "F003D000"); //setLabels fileRead.setLabels("0.99"); EXPECT_EQ(fileRead.getLabel(), "0.99"); //getOTUTotal EXPECT_EQ(fileRead.getOTUTotal(4), 12); //getOTU vector otu5 = fileRead.getOTU(4); EXPECT_EQ(otu5[0], 0); EXPECT_EQ(otu5[1], 2); //get EXPECT_EQ(fileRead.get(4, "F003D000"), 0); EXPECT_EQ(fileRead.get(4, "F003D002"), 2); EXPECT_EQ(fileRead.get(0, "F003D142"), 6); //set fileRead.set(4, 10, "F003D000"); EXPECT_EQ(fileRead.get(4, "F003D000"), 10); fileRead.set(4, 15, "F003D002"); EXPECT_EQ(fileRead.get(4, "F003D002"), 15); fileRead.set(0, 3, "F003D142"); EXPECT_EQ(fileRead.get(0, "F003D142"), 3); //getOTUNames vector otuNames = fileRead.getOTUNames(); EXPECT_EQ(otuNames[10], "Otu11"); //setOTUNames otuNames[5] = "Otu99"; fileRead.setOTUNames(otuNames); EXPECT_EQ(fileRead.getOTUNames()[5], "Otu99"); EXPECT_EQ(fileRead.getOTUName(5), "Otu99"); fileRead.setOTUName(5, "Otu6"); EXPECT_EQ(fileRead.getOTUName(5), "Otu6"); //getNumBins EXPECT_EQ(fileRead.getNumBins(), 58); //getNumSeqsSmallestGroup EXPECT_EQ(fileRead.getNumSeqsSmallestGroup(), 15); //getNamesGroups EXPECT_EQ(fileRead.getNamesGroups()[1], "F003D002"); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 10); //getNumSeqs EXPECT_EQ(fileRead.getNumSeqs("F003D002"), 31); } TEST(Test_Container_SharedRabundVectors, PushBack) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 10); vector abunds(58, 5); SharedRAbundVector* temp = new SharedRAbundVector(abunds); temp->setGroup("myabunds"); fileRead.push_back(temp); //getNumGroups EXPECT_EQ(fileRead.getNumGroups(), 11); vector otuAbunds(11, 2); string otuLabel = "Otu59"; fileRead.push_back(otuAbunds, otuLabel); EXPECT_EQ(fileRead.getNumBins(), 59); } TEST(Test_Container_SharedRabundVectors, eliminateZeroOTUS) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.getNumBins(), 58); fileRead.set(16, 0, "F003D142"); EXPECT_EQ(fileRead.get(16, "F003D142"), 0); //zero out bin fileRead.eliminateZeroOTUS(); //remove zeroed out bin EXPECT_EQ(fileRead.getNumBins(), 57); } TEST(Test_Container_SharedRabundVectors, Removes) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.getNumBins(), 58); EXPECT_EQ(fileRead.removeOTU(16), 2); EXPECT_EQ(fileRead.getNumBins(), 57); vector groups; groups.push_back("F003D142"); groups.push_back("F003D002"); groups.push_back("F003D004"); groups.push_back("F003D006"); fileRead.removeGroups(groups); EXPECT_EQ(fileRead.getNumBins(), 41); EXPECT_EQ(fileRead.getNumSeqsSmallestGroup(), 15); fileRead.removeGroups(20); //remove groups with abundance less than 20 EXPECT_EQ(fileRead.getNumGroups(), 4); } TEST(Test_Container_SharedRabundVectors, SizeClear) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); EXPECT_EQ(fileRead.size(), 10); fileRead.clear(); EXPECT_EQ(fileRead.getNumBins(), 0); } TEST(Test_Container_SharedRabundVectors, GetRabundVector) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); RAbundVector temp = fileRead.getRAbundVector(); EXPECT_EQ(temp.get(0), 24); RAbundVector temp2 = fileRead.getRAbundVector("F003D142"); EXPECT_EQ(temp2.get(0), 6); } TEST(Test_Container_SharedRabundVectors, GetSabundVector) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); SAbundVector temp = fileRead.getSAbundVector(); EXPECT_EQ(temp.get(5), 1); //number of OTUs with abundance of 5 EXPECT_EQ(temp.get(1), 35); //number of OTUs with abundance of 1 EXPECT_EQ(temp.getMaxRank(), 24); } TEST(Test_Container_SharedRabundVectors, GetSharedRAbundVectors) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); vector temp = fileRead.getSharedRAbundVectors(); EXPECT_EQ(temp[0]->get(5), 0); //first groups abundance of OTU5 EXPECT_EQ(temp[1]->get(5), 2); //first groups abundance of OTU5 EXPECT_EQ(temp[0]->get(0), 1); //first groups abundance of OTU1 } TEST(Test_Container_SharedRabundVectors, GetSharedRAbundFloatVectors) { TestSharedRabundVectors test; ifstream in; Utils util; util.openInputFile(test.sharedFile, in); vector userGroups; string nextLabel, labelTag; SharedRAbundVectors fileRead(in, userGroups, nextLabel, labelTag); vector temp = fileRead.getSharedRAbundFloatVectors(); EXPECT_EQ(temp[0]->get(5), 0.0); //first groups abundance of OTU5 as a float. This is not the same as if we ran get.relabund() command EXPECT_EQ(temp[1]->get(5), 2.0); //first groups abundance of OTU5 as a float. This is not the same as if we ran get.relabund() command EXPECT_EQ(temp[0]->get(0), 1.0); //first groups abundance of OTU1 as a float. This is not the same as if we ran get.relabund() command } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testcontainers/testsharedrabundvectors.hpp000066400000000000000000000010111424121717000257560ustar00rootroot00000000000000// // testsharedrabundvectors.hpp // Mothur // // Created by Sarah Westcott on 8/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testsharedrabundvectors_hpp #define testsharedrabundvectors_hpp #include "sharedrabundvectors.hpp" #include "gtest/gtest.h" class TestSharedRabundVectors : public SharedRAbundVectors { public: TestSharedRabundVectors(); ~TestSharedRabundVectors(); MothurOut* m; string sharedFile; }; #endif /* testsharedrabundvectors_hpp */ mothur-1.48.0/TestMothur/testopticluster.cpp000066400000000000000000000035621424121717000212260ustar00rootroot00000000000000// // testopticluster.cpp // Mothur // // Created by Sarah Westcott on 6/15/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "testopticluster.h" #include "distancecommand.h" #include "dataset.h" /**************************************************************************************************/ TestOptiCluster::TestOptiCluster() { //setup m = MothurOut::getInstance(); metric = new MCC(); setVariables(&testMatrix, metric); } /**************************************************************************************************/ TestOptiCluster::~TestOptiCluster() { delete metric; } /**************************************************************************************************/ TEST(Test_Cluster_OptiCluster, myInitialize) { TestOptiCluster test; double initialMetricValue; EXPECT_EQ(0,(test.initialize(initialMetricValue, true, "singleton"))); } TEST(Test_Cluster_OptiCluster, myUpdate) { TestOptiCluster test; double initialMetricValue; test.initialize(initialMetricValue, false, "singleton"); //no randomization test.update(initialMetricValue); //first round ASSERT_NEAR(1, initialMetricValue, 0.00001); //metric value test.update(initialMetricValue); //first round ASSERT_NEAR(1, initialMetricValue, 0.00001); //metric value } TEST(Test_Cluster_OptiCluster, getCloseFarCounts) { TestOptiCluster test; double initialMetricValue; test.initialize(initialMetricValue, false, "singleton"); //no randomization test.update(initialMetricValue); vector results = test.getCloseFarCounts(0, 31); ASSERT_EQ(results[0], 0); //number of close sequences in bin 31 to seq 0 ASSERT_EQ(results[1], 10); //number of far sequences in bin 31 to seq 0 } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testopticluster.h000066400000000000000000000012701424121717000206650ustar00rootroot00000000000000// // testopticluster.h // Mothur // // Created by Sarah Westcott on 6/15/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__testopticluster__ #define __Mothur__testopticluster__ #include "gtest/gtest.h" #include "opticluster.h" #include "fakeoptimatrix.hpp" class TestOptiCluster : public OptiCluster { public: TestOptiCluster(); ~TestOptiCluster(); MothurOut* m; ClusterMetric* metric; FakeOptiMatrix testMatrix; using OptiCluster::setVariables; using OptiCluster::initialize; using OptiCluster::update; using OptiCluster::getCloseFarCounts; }; #endif /* defined(__Mothur__testopticluster__) */ mothur-1.48.0/TestMothur/testoptirefmatrix.cpp000066400000000000000000000313301424121717000215400ustar00rootroot00000000000000// // testoptirefmatrix.cpp // Mothur // // Created by Sarah Westcott on 7/24/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testoptirefmatrix.hpp" #include "dataset.h" /**************************************************************************************************/ TestOptiRefMatrix::TestOptiRefMatrix() { //setup m = MothurOut::getInstance(); TestDataSet data; filenames = data.getSubsetFNGFiles(); //Fasta, name, group returned columnFile = data.getSubsetFNGDistFile(); phylipFile = data.getSubsetFNGPhylipDistFile(); reffilenames = data.getOptiRefFiles(); //fasta, count, column, phylip, list, betweendist returned } /**************************************************************************************************/ TestOptiRefMatrix::~TestOptiRefMatrix() {} /**************************************************************************************************/ //distfile, distFormat, dupsFile, dupsFormat, cutoff, percentage to be fitseqs - will randomly assign as fit TEST(Test_Container_OptiRefMatrix, readColumnDenovo) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); //EXPECT_EQ(160,(matrix.print(cout))); EXPECT_EQ(80,(matrix.getNumDists())); EXPECT_EQ(17,(matrix.getNumFitDists())); EXPECT_EQ(27,(matrix.getNumRefDists())); vector refSeqs = matrix.getRefSeqs(); string Expected_ReturnResults = "1357111516181920222527283132333435373840414445474851535556"; string ReturnResults = ""; for (long long i = 0; i < refSeqs.size(); i++) { ReturnResults += toString(refSeqs[i]); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); long long sanityCheck = matrix.getNumDists() - (matrix.getNumFitDists() + matrix.getNumRefDists()); EXPECT_EQ(36,sanityCheck); //number of inbetween dists } //distfile, distFormat, dupsFile, dupsFormat, cutoff, percentage to be fitseqs - will randomly assign as fit TEST(Test_Container_OptiRefMatrix, readPhylipDenovo) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.phylipFile, "phylip", testOMatrix.filenames[1], "name", 0.03, 50, "none"); //EXPECT_EQ(160,(matrix.print(cout))); EXPECT_EQ(80,(matrix.getNumDists())); EXPECT_EQ(17,(matrix.getNumFitDists())); EXPECT_EQ(22,(matrix.getNumRefDists())); vector refSeqs = matrix.getRefSeqs(); string Expected_ReturnResults = "136791014161819212224282930323335363839414344455152535556"; string ReturnResults = ""; for (long long i = 0; i < refSeqs.size(); i++) { ReturnResults += toString(refSeqs[i]); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); long long sanityCheck = matrix.getNumDists() - (matrix.getNumFitDists() + matrix.getNumRefDists()); EXPECT_EQ(41,sanityCheck); //number of inbetween dists } //refdistfile, refname or refcount, refformat, refdistformat, cutoff, fitdistfile, fitname or fitcount, fitformat, fitdistformat, betweendistfile, betweendistformat - files for reference TEST(Test_Container_OptiRefMatrix, readColumnReference) { TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.reffilenames[2], testOMatrix.reffilenames[1], "count", "column", 0.03, testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", testOMatrix.reffilenames[5], "column"); //EXPECT_EQ(113772,(matrix.print(cout))); EXPECT_EQ(56886,(matrix.getNumDists())); //unique dists 56886*2=113772 EXPECT_EQ(80,(matrix.getNumFitDists())); EXPECT_EQ(56675,(matrix.getNumRefDists())); long long sanityCheck = matrix.getNumDists() - (matrix.getNumFitDists() + matrix.getNumRefDists()); EXPECT_EQ(131,sanityCheck); //number of inbetween dists } //refdistfile, refname or refcount, refformat, refdistformat, cutoff, fitdistfile, fitname or fitcount, fitformat, fitdistformat, betweendistfile, betweendistformat - files for reference TEST(Test_Container_OptiRefMatrix, readPhylipReference) { TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.reffilenames[3], testOMatrix.reffilenames[1], "count", "phylip", 0.03, testOMatrix.columnFile, testOMatrix.filenames[1], "name", "column", testOMatrix.reffilenames[5], "column"); //EXPECT_EQ(113772,(matrix.print(cout))); EXPECT_EQ(56893,(matrix.getNumDists())); //unique dists 56886*2=113772 EXPECT_EQ(80,(matrix.getNumFitDists())); EXPECT_EQ(56682,(matrix.getNumRefDists())); long long sanityCheck = matrix.getNumDists() - (matrix.getNumFitDists() + matrix.getNumRefDists()); EXPECT_EQ(131,sanityCheck); //number of inbetween dists } TEST(Test_Container_OptiRefMatrix, getNumCLose) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); EXPECT_EQ(1,(matrix.getNumClose(0))); EXPECT_EQ(2,(matrix.getNumClose(5))); EXPECT_EQ(3,(matrix.getNumClose(10))); EXPECT_EQ(7,(matrix.getNumClose(15))); EXPECT_EQ(2,(matrix.getNumClose(20))); EXPECT_EQ(1,(matrix.getNumFitClose(0))); EXPECT_EQ(0,(matrix.getNumFitClose(5))); EXPECT_EQ(1,(matrix.getNumFitClose(10))); EXPECT_EQ(4,(matrix.getNumFitClose(15))); EXPECT_EQ(0,(matrix.getNumFitClose(20))); EXPECT_EQ(0,(matrix.getNumRefClose(0))); EXPECT_EQ(2,(matrix.getNumRefClose(5))); EXPECT_EQ(2,(matrix.getNumRefClose(10))); EXPECT_EQ(3,(matrix.getNumRefClose(15))); EXPECT_EQ(2,(matrix.getNumRefClose(20))); } TEST(Test_Container_OptiRefMatrix, isCloseFit) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); bool isFit; vector fitSeqs = matrix.getFitSeqs(); string Expected_ReturnResults = "024689101213141721232426293036394243464950525457"; string ReturnResults = ""; for (long long i = 0; i < fitSeqs.size(); i++) { ReturnResults += toString(fitSeqs[i]); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //check closeness EXPECT_EQ(true,(matrix.isClose(0, 8))); EXPECT_EQ(true,(matrix.isClose(1, 28))); EXPECT_EQ(true,(matrix.isClose(2, 44))); EXPECT_EQ(true,(matrix.isClose(15, 42))); EXPECT_EQ(true,(matrix.isClose(35, 36))); //check not close EXPECT_EQ(false,(matrix.isClose(57, 8))); EXPECT_EQ(false,(matrix.isClose(47, 28))); EXPECT_EQ(false,(matrix.isClose(32, 44))); EXPECT_EQ(false,(matrix.isClose(23, 42))); EXPECT_EQ(false,(matrix.isClose(12, 36))); //assumes first value is a fitSeq EXPECT_EQ(true,(matrix.isCloseFit(0, 8, isFit))); //both fit and close EXPECT_EQ(true, isFit); EXPECT_EQ(false,(matrix.isCloseFit(2, 28, isFit))); //not fit EXPECT_EQ(false, isFit); EXPECT_EQ(false,(matrix.isCloseFit(3, 20, isFit))); //not fit, but close EXPECT_EQ(false, isFit); EXPECT_EQ(false,(matrix.isCloseFit(13, 42, isFit))); //both fit, not close EXPECT_EQ(true, isFit); EXPECT_EQ(false,(matrix.isCloseFit(30, 36, isFit))); //both fit, not close EXPECT_EQ(true, isFit); } TEST(Test_Container_OptiRefMatrix, getCloseFitSeqs) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); //"024689101213141721232426293036394243464950525457"; //17 GQY1XT001BJ4H6,..,GQY1XT001CW8RQ 11 32 52 55 57 string Expected_ReturnResults = ""; Expected_ReturnResults += "52"; Expected_ReturnResults += "57"; set temp = matrix.getCloseFitSeqs(17); string ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //50 GQY1XT001EK1FO 13 Expected_ReturnResults = ""; Expected_ReturnResults += "13"; temp = matrix.getCloseFitSeqs(50); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //52 GQY1XT001ENMKV 11 17 32 55 57 Expected_ReturnResults = ""; Expected_ReturnResults += "17"; Expected_ReturnResults += "57"; temp = matrix.getCloseFitSeqs(52); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //36 GQY1XT001DHDV0,GQY1XT001B0UFF 15 24 35 38 Expected_ReturnResults = ""; Expected_ReturnResults += "24"; temp = matrix.getCloseFitSeqs(36); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); //21 GQY1XT001BUMO0 26 42 46 Expected_ReturnResults = ""; Expected_ReturnResults += "26"; Expected_ReturnResults += "42"; Expected_ReturnResults += "46"; temp = matrix.getCloseFitSeqs(21); ReturnResults = ""; for (set::iterator it = temp.begin(); it != temp.end(); it++) { ReturnResults += toString(*it); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); } TEST(Test_Container_OptiRefMatrix, extractRefMatrix) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); OptiData* refMatrix = matrix.extractRefMatrix(); //EXPECT_EQ(54,(refMatrix->print(cout))); EXPECT_EQ(54,(refMatrix->getNumDists())); } TEST(Test_Container_OptiRefMatrix, extractMatrixSubset) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); vector temp = matrix.getFitSeqs(); set fitSeqs; for (long long i = 0; i < temp.size(); i++) { fitSeqs.insert(temp[i]); } OptiData* fitMatrix = matrix.extractMatrixSubset(fitSeqs); //EXPECT_EQ(34,(fitMatrix->print(cout))); EXPECT_EQ(34,(fitMatrix->getNumDists())); } TEST(Test_Container_OptiRefMatrix, getFitListSingle) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); //maps names to index in closeness matrix ListVector* fitListSingle = matrix.getFitListSingle(); //check bin 0 string bin = fitListSingle->get(0); EXPECT_EQ("GQY1XT001AD34Z", bin); bin = fitListSingle->get(10); EXPECT_EQ("GQY1XT001CH9UX,GQY1XT001C80OT,GQY1XT001BEIF2,GQY1XT001DFU9M,GQY1XT001DNJRS", bin); bin = fitListSingle->get(16); EXPECT_EQ("GQY1XT001EACH9", bin); bin = fitListSingle->get(3); EXPECT_EQ("GQY1XT001B8C4W,GQY1XT001DBTGA,GQY1XT001B4VQ6", bin); bin = fitListSingle->get(7); EXPECT_EQ("GQY1XT001C4UVG", bin); } TEST(Test_Container_OptiRefMatrix, randomizeRefs) { MothurOut* m; m = MothurOut::getInstance(); m->setRandomSeed(123456); //stabilize radomization TestOptiRefMatrix testOMatrix; OptiRefMatrix matrix(testOMatrix.columnFile, "column", testOMatrix.filenames[1], "name", 0.03, 50, "none"); matrix.randomizeRefs(); vector refSeqs = matrix.getRefSeqs(); string Expected_ReturnResults = "0135678112023242629313236394041434547525356"; string ReturnResults = ""; for (long long i = 0; i < refSeqs.size(); i++) { ReturnResults += toString(refSeqs[i]); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); matrix.randomizeRefs(); refSeqs = matrix.getRefSeqs(); Expected_ReturnResults = "3467891315161718202122232426273132333536373841474950"; ReturnResults = ""; for (long long i = 0; i < refSeqs.size(); i++) { ReturnResults += toString(refSeqs[i]); } EXPECT_EQ(Expected_ReturnResults, ReturnResults); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testoptirefmatrix.hpp000066400000000000000000000010741424121717000215470ustar00rootroot00000000000000// // testoptirefmatrix.hpp // Mothur // // Created by Sarah Westcott on 7/24/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testoptirefmatrix_hpp #define testoptirefmatrix_hpp #include "optirefmatrix.hpp" #include "gtest/gtest.h" #include "gmock/gmock.h" class TestOptiRefMatrix : public OptiRefMatrix { public: TestOptiRefMatrix(); ~TestOptiRefMatrix(); string columnFile, phylipFile; vector filenames; vector reffilenames; private: }; #endif /* testoptirefmatrix_hpp */ mothur-1.48.0/TestMothur/testsubsample.cpp000066400000000000000000000025031424121717000206360ustar00rootroot00000000000000// // testsubsample.cpp // Mothur // // Created by Sarah Westcott on 11/15/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "testsubsample.hpp" /**************************************************************************************************/ TestSubSample::TestSubSample() { //setup m = MothurOut::getInstance(); } /**************************************************************************************************/ TestSubSample::~TestSubSample() { } /**************************************************************************************************/ TEST(Test_SubSample, getWeightedSample) { TestSubSample test; map weights; weights[1] = 1; weights[2] = 5; weights[3] = 10; weights[4] = 15; weights[5] = 20; weights[6] = 25; weights[7] = 30; weights[8] = 35; weights[9] = 40; weights[10] = 45; //226 total reads represented set names = test.getWeightedSample(weights, 10); //select all the reads EXPECT_EQ(1,*names.begin()); names = test.getWeightedSample(weights, 5); //select 1 the read set::iterator it = names.find(10); EXPECT_EQ((it != names.end()),true); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testsubsample.hpp000066400000000000000000000006261424121717000206470ustar00rootroot00000000000000// // testsubsample.hpp // Mothur // // Created by Sarah Westcott on 11/15/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef testsubsample_hpp #define testsubsample_hpp #include "gtest/gtest.h" #include "subsample.h" class TestSubSample : public SubSample { public: TestSubSample(); ~TestSubSample(); MothurOut* m; }; #endif /* testsubsample_hpp */ mothur-1.48.0/TestMothur/testtrimoligos.cpp000066400000000000000000001437671424121717000210550ustar00rootroot00000000000000// // testtrimoligos.cpp // Mothur // // Created by Sarah Westcott on 7/14/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #include "testtrimoligos.hpp" /**************************************************************************************************/ TestTrimOligos::TestTrimOligos() { //setup } /**************************************************************************************************/ TestTrimOligos::~TestTrimOligos() { } /**************************************************************************************************/ TEST(Test_TrimOligos, SingleDirectionStripBarcodes) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "TGGTGAACCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); int groupIndex; vector results = noDiffSingleTrim.stripBarcode(F003D150, groupIndex); EXPECT_EQ(9, groupIndex); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); EXPECT_EQ("CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //TGGTGAAC barcode removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("TGGTGTACCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); results = someDiffsSingleTrim.stripBarcode(F003D150, groupIndex); EXPECT_EQ(9, groupIndex); EXPECT_EQ(1, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 1)); EXPECT_EQ("CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //TGGTGAAC barcode removed //nomatch option F003D150.setAligned("GTACCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); results = someDiffsSingleTrim.stripBarcode(F003D150, groupIndex); EXPECT_EQ(3, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 1)); //4 barcode diff, 2 primer diffs - force multiMatch //barcode TTC GT G G C F003D004 //barcode TTC TT G A C F003D006 TrimOligos lotsOfDiffsSingleTrim(2,0,4,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D006("GQY1XT001CAJGU", "TTCAATACCCGTCAATTCCTTTAAGTTTCAACCTTGCGATCGTACTCCCCAGGTGGGATACTTATTGCGTTAGCTGCGGCACGCAGGGGGTCAGTCCCCGCACACCTAGTATCCATCGTTTACAGCGTGGACTACCAGGGTATCTAATCCTGTTTGCTCCCCACGCTTTCGCGCCTCACCGTCAGTTGTCGTCCAGCAGGCCGCCTTCGCCACTGGTGTTCCTCCTAATATCTACGCATTTCACCGCTACACTAGGAATTCCGCCTGCCTCTCCGATACTCAAGACCTACAGTTTCAAATGCA"); results = lotsOfDiffsSingleTrim.stripBarcode(F003D006, groupIndex); EXPECT_EQ(3, results[0]); //three diffs to find match, but matches F003D004 and F003D006 EXPECT_EQ("multipleMatches", lotsOfDiffsSingleTrim.getCodeValue(results[1], 4)); } TEST(Test_TrimOligos, SingleDirectionStripQualBarcodes) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "TGGTGAACCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); //fragment vector qualScores(53, 38); QualityScores F003D150Q("GQY1XT001ASWK1", qualScores); //fragment int groupIndex; vector results = noDiffSingleTrim.stripBarcode(F003D150, F003D150Q, groupIndex); EXPECT_EQ(9, groupIndex); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); EXPECT_EQ("CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG", F003D150.getUnaligned());//TGGTGAAC barcode removed EXPECT_EQ(45, F003D150Q.getLength()); //barcode removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("TGGTGCACCCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); F003D150Q.setScores(qualScores); results = someDiffsSingleTrim.stripBarcode(F003D150, F003D150Q, groupIndex); EXPECT_EQ(9, groupIndex); EXPECT_EQ(1, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 1)); EXPECT_EQ("CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG", F003D150.getUnaligned()); //TGGTGAAC barcode removed EXPECT_EQ(45, F003D150Q.getLength()); //barcode removed //nomatch option F003D150.setAligned("TCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); results = someDiffsSingleTrim.stripBarcode(F003D150, groupIndex); EXPECT_EQ(5, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 1)); //4 barcode diff, 2 primer diffs - force multiMatch TTCTTGAC //barcode TTC G TG G C F003D004 //barcode TTC T TG A C F003D006 TrimOligos lotsOfDiffsSingleTrim(2,0,2,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D006("GQY1XT001CAJGU", "TTCATTGCCCGTCAATTCCTTTAAGTTTCAACCTTGCGATCGTACTCCCCAGG"); F003D150Q.setScores(qualScores); results = lotsOfDiffsSingleTrim.stripBarcode(F003D006, F003D150Q, groupIndex); EXPECT_EQ(2, results[0]); //three diffs to find match, but matches F003D004 and F003D006 EXPECT_EQ("multipleMatches", lotsOfDiffsSingleTrim.getCodeValue(results[1], 2)); } TEST(Test_TrimOligos, SingleDirectionStripPrimers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); int groupIndex; vector results = noDiffSingleTrim.stripForward(F003D150, groupIndex); EXPECT_EQ(0, groupIndex); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); //CCGTCAATTCMTTTRAGT primer removed EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //primer removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("CCTTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); results = someDiffsSingleTrim.stripForward(F003D150, groupIndex); EXPECT_EQ(0, groupIndex); EXPECT_EQ(1, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 1)); //TGGTGAAC barcode removed EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //barcode removed //nomatch option F003D150.setAligned("CCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTGCCACCCAGGGGTCAATCCCCCCGGACAGCTAGCATTCATCGTTTACTGTGCGGACTACCAGGGTATCTAATCCTGTTTGATCCCCGCACTTTCGTGCCTCAGCGTCAGTAGGGCGCCGGTATGCTGCCTTCGCAATCGGGGTTCTGCGTGATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT"); results = someDiffsSingleTrim.stripForward(F003D150, groupIndex); EXPECT_EQ(9, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 1)); } TEST(Test_TrimOligos, SingleDirectionStripQualPrimers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "CCGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); //fragment vector qualScores(45, 38); QualityScores F003D150Q("GQY1XT001ASWK1", qualScores); //fragment int groupIndex; vector results = noDiffSingleTrim.stripForward(F003D150, F003D150Q, groupIndex, false); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGG", F003D150.getUnaligned());//TGGTGAAC barcode removed EXPECT_EQ(27, F003D150Q.getLength()); //barcode removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("CGGTCAATTCCTTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); F003D150Q.setScores(qualScores); results = someDiffsSingleTrim.stripForward(F003D150, F003D150Q, groupIndex, false); EXPECT_EQ(1, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 1)); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGG", F003D150.getUnaligned()); //TGGTGAAC barcode removed EXPECT_EQ(27, F003D150Q.getLength()); //barcode removed //nomatch option F003D150.setAligned("TTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGG"); F003D150Q.setScores(qualScores); results = someDiffsSingleTrim.stripForward(F003D150, F003D150Q, groupIndex, false); EXPECT_EQ(11, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 1)); } TEST(Test_TrimOligos, SingleDirectionStripReversePrimers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); vector results = noDiffSingleTrim.stripReverse(F003D150); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //primer removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTCC"); results = someDiffsSingleTrim.stripReverse(F003D150); EXPECT_EQ(2, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 2)); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); //barcode removed //nomatch option F003D150.setAligned("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCCCC"); results = someDiffsSingleTrim.stripReverse(F003D150); EXPECT_EQ(3, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 2)); } TEST(Test_TrimOligos, SingleDirectionStripQualReversePrimers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); vector qualScores(84, 38); QualityScores F003D150Q("GQY1XT001ASWK1", qualScores); //fragment vector results = noDiffSingleTrim.stripReverse(F003D150, F003D150Q); EXPECT_EQ(0, results[0]); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); EXPECT_EQ(67, F003D150Q.getLength()); //reverse removed //1 barcode diff, 2 primer diffs TrimOligos someDiffsSingleTrim(2,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, bdiffs, primers, barcodes, revPrimers F003D150.setAligned("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTCC"); F003D150Q.setScores(qualScores); results = someDiffsSingleTrim.stripReverse(F003D150, F003D150Q); EXPECT_EQ(2, results[0]); EXPECT_EQ("match", someDiffsSingleTrim.getCodeValue(results[1], 2)); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTT", F003D150.getUnaligned()); EXPECT_EQ(67, F003D150Q.getLength()); //reverse removed //nomatch option F003D150.setAligned("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCCCC"); F003D150Q.setScores(qualScores); results = someDiffsSingleTrim.stripReverse(F003D150); EXPECT_EQ(3, results[0]); EXPECT_EQ("noMatch", someDiffsSingleTrim.getCodeValue(results[1], 2)); EXPECT_EQ(84, F003D150Q.getLength()); //reverse not removed } TEST(Test_TrimOligos, TwoDirectionStripBarcodes) { TestTrimOligos testTrim; testTrim.oligos.loadPaired(); //no diffs allowed TrimOligos noDiffTwoDirectionTrim(0,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex Sequence F05R2F_forward("F05R2F_forward" ,"CTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); Sequence F05R2F_reverse("F05R2F_reverse" ,"GGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); int groupIndex; vector results = noDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(0, (results[0]+results[2])); EXPECT_EQ("match", noDiffTwoDirectionTrim.getCodeValue(results[3], 0)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed //diffs allowed TrimOligos someDiffTwoDirectionTrim(0,1,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex F05R2F_forward.setAligned("CTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("CGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(1, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed F05R2F_forward.setAligned("CATACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(1, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed F05R2F_forward.setAligned("GTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("CCGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, groupIndex); //cttac gggtt EXPECT_EQ(3, (results[0]+results[2])); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("GTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed } TEST(Test_TrimOligos, TwoDirectionStripQualBarcodes) { TestTrimOligos testTrim; testTrim.oligos.loadPaired(); //no diffs allowed TrimOligos noDiffTwoDirectionTrim(0,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex Sequence F05R2F_forward("F05R2F_forward" ,"CTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); Sequence F05R2F_reverse("F05R2F_reverse" ,"GGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); vector qualScores(82, 38); QualityScores F05R2F_forwardQual("F05R2F_forward", qualScores); //fragment QualityScores F05R2F_reverseQual("F05R2F_reverse", qualScores); //fragment int groupIndex; vector results = noDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(0, (results[0]+results[2])); EXPECT_EQ("match", noDiffTwoDirectionTrim.getCodeValue(results[3], 0)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed EXPECT_EQ(77, F05R2F_forwardQual.getLength()); //barcode removed EXPECT_EQ(77, F05R2F_reverseQual.getLength()); //barcode removed //diffs allowed TrimOligos someDiffTwoDirectionTrim(0,1,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex F05R2F_forward.setAligned("CTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("CGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); F05R2F_forwardQual.setScores(qualScores); F05R2F_reverseQual.setScores(qualScores); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(1, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed EXPECT_EQ(77, F05R2F_forwardQual.getLength()); //barcode removed EXPECT_EQ(77, F05R2F_reverseQual.getLength()); //barcode removed F05R2F_forward.setAligned("CATACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GGGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); F05R2F_forwardQual.setScores(qualScores); F05R2F_reverseQual.setScores(qualScores); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); //cttac gggtt EXPECT_EQ(8, groupIndex); EXPECT_EQ(1, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed EXPECT_EQ(77, F05R2F_forwardQual.getLength()); //barcode removed EXPECT_EQ(77, F05R2F_reverseQual.getLength()); //barcode removed F05R2F_forward.setAligned("GTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("CCGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); F05R2F_forwardQual.setScores(qualScores); F05R2F_reverseQual.setScores(qualScores); results = someDiffTwoDirectionTrim.stripBarcode(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); //cttac gggtt EXPECT_EQ(3, (results[0]+results[2])); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[3], 1)); EXPECT_EQ("GTTACATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); //cttac barcode removed EXPECT_EQ("CCGTTCCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //gggtt barcode removed EXPECT_EQ(82, F05R2F_forwardQual.getLength()); //barcode removed EXPECT_EQ(82, F05R2F_reverseQual.getLength()); //barcode removed } TEST(Test_TrimOligos, TwoDirectionStripPrimers) { TestTrimOligos testTrim; testTrim.oligos.loadPaired(); //no diffs allowed TrimOligos noDiffTwoDirectionTrim(0,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex Sequence F05R2F_forward("F05R2F_forward" ,"ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); Sequence F05R2F_reverse("F05R2F_reverse" ,"CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); int groupIndex; vector results = noDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(0, (results[0]+results[2])); EXPECT_EQ("match", noDiffTwoDirectionTrim.getCodeValue(results[3], 0)); EXPECT_EQ("ACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //allows for 2 diffs on each end - the command code decides pass/fail TrimOligos someDiffTwoDirectionTrim(2,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex F05R2F_forward.setAligned("TTTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); results = someDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(2, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 2)); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[1], 2)); EXPECT_EQ("ACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); //allows for 2 diffs on each end - the command code decides pass/fail F05R2F_forward.setAligned("TTTTGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GGCGTGAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); results = someDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(5, (results[0]+results[2])); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[3], 2)); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[1], 2)); EXPECT_EQ("TTTTGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("GGCGTGAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); } TEST(Test_TrimOligos, TwoDirectionStripQualPrimers) { TestTrimOligos testTrim; testTrim.oligos.loadPaired(); //no diffs allowed TrimOligos noDiffTwoDirectionTrim(0,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex Sequence F05R2F_forward("F05R2F_forward" ,"ATTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); vector qualScores(77, 38); QualityScores F05R2F_forwardQual("F05R2F_forward", qualScores); //fragment Sequence F05R2F_reverse("F05R2F_reverse" ,"CCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); QualityScores F05R2F_reverseQual("F05R2F_reverse", qualScores); //fragment int groupIndex; vector results = noDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(0, (results[0]+results[2])); EXPECT_EQ("match", noDiffTwoDirectionTrim.getCodeValue(results[3], 0)); EXPECT_EQ("ACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); EXPECT_EQ(57, F05R2F_forwardQual.getLength()); //primer removed EXPECT_EQ(58, F05R2F_reverseQual.getLength()); //primer removed //allows for 2 diffs on each end - the command code decides pass/fail TrimOligos someDiffTwoDirectionTrim(2,0,0,0,testTrim.oligos.ipprimers, testTrim.oligos.ipbarcodes, false); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex F05R2F_forward.setAligned("TTTAGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GCCGTCAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); F05R2F_forwardQual.setScores(qualScores); F05R2F_reverseQual.setScores(qualScores); results = someDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(2, (results[0]+results[2])); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[3], 2)); EXPECT_EQ("match", someDiffTwoDirectionTrim.getCodeValue(results[1], 2)); EXPECT_EQ("ACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("TTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); EXPECT_EQ(57, F05R2F_forwardQual.getLength()); //primer removed EXPECT_EQ(58, F05R2F_reverseQual.getLength()); //primer removed //allows for 2 diffs on each end - the command code decides pass/fail F05R2F_forward.setAligned("TTTTGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG"); F05R2F_reverse.setAligned("GGCGTGAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC"); F05R2F_forwardQual.setScores(qualScores); F05R2F_reverseQual.setScores(qualScores); results = someDiffTwoDirectionTrim.stripForward(F05R2F_forward, F05R2F_reverse, F05R2F_forwardQual, F05R2F_reverseQual, groupIndex); EXPECT_EQ(1, groupIndex); EXPECT_EQ(5, (results[0]+results[2])); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[3], 2)); EXPECT_EQ("noMatch", someDiffTwoDirectionTrim.getCodeValue(results[1], 2)); EXPECT_EQ("TTTTGATACCCGGGTAGTCCACGCAGTAAACGATGCATGCTAACTGTCAGGTGCGTTGAGCGCGGTGCGATGCAGCG", F05R2F_forward.getUnaligned()); EXPECT_EQ("GGCGTGAATTCATTTGAGTTTCACCGTTGCCGGCGTACTCCCCAGGTGGAATGCTTAACGCTTTCGCTGTACCGCCC", F05R2F_reverse.getUnaligned()); EXPECT_EQ(77, F05R2F_forwardQual.getLength()); //primer removed EXPECT_EQ(77, F05R2F_reverseQual.getLength()); //primer removed } TEST(Test_TrimOligos, SingleDirectionStripLinkers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer Sequence F003D150("GQY1XT001ASWK1", "TGACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); int result = noDiffSingleTrim.stripLinker(F003D150); EXPECT_EQ(0, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); //1 linker diff TrimOligos someDiffsSingleTrim(0,0,1,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); F003D150.setAligned("AGACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); result = someDiffsSingleTrim.stripLinker(F003D150); EXPECT_EQ(1, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); //nomatch option F003D150.setAligned("TTCATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); result = someDiffsSingleTrim.stripLinker(F003D150); EXPECT_EQ(7, result); EXPECT_EQ("TTCATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); } TEST(Test_TrimOligos, SingleDirectionStripQualLinkers) { TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer Sequence F003D150("GQY1XT001ASWK1", "TGACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); vector qualScores(88, 38); QualityScores F003D150Q("F003D150Q", qualScores); //fragment int result = noDiffSingleTrim.stripLinker(F003D150, F003D150Q); EXPECT_EQ(0, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); EXPECT_EQ(84, F003D150Q.getLength()); //1 linker diff TrimOligos someDiffsSingleTrim(0,0,1,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); F003D150.setAligned("AGACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); F003D150Q.setScores(qualScores); result = someDiffsSingleTrim.stripLinker(F003D150, F003D150Q); EXPECT_EQ(1, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); EXPECT_EQ(84, F003D150Q.getLength()); //nomatch option F003D150.setAligned("TTCATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); F003D150Q.setScores(qualScores); result = someDiffsSingleTrim.stripLinker(F003D150, F003D150Q); EXPECT_EQ(7, result); EXPECT_EQ("TTCATATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); EXPECT_EQ(88, F003D150Q.getLength()); } TEST(Test_TrimOligos, SingleDirectionStripSpacers) { //CACTG, CCAAC TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed TrimOligos noDiffSingleTrim(0,0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer Sequence F003D150("GQY1XT001ASWK1", "CCAACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); int result = noDiffSingleTrim.stripSpacer(F003D150); EXPECT_EQ(0, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); //1 linker diff TrimOligos someDiffsSingleTrim(0,0,0,1,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer, testTrim.oligos.linker, testTrim.oligos.spacer); F003D150.setAligned("CAGTGTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); result = someDiffsSingleTrim.stripSpacer(F003D150); EXPECT_EQ(1, result); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); //nomatch option F003D150.setAligned("CGTACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); result = someDiffsSingleTrim.stripSpacer(F003D150); EXPECT_EQ(2, result); EXPECT_EQ("CGTACTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", F003D150.getUnaligned()); } TEST(Test_TrimOligos, SingleDirectionFindForward) { //CCGTCAATTCMTTTRAGT TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed - unaligned TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, rpdiffs, bdiffs, primers, barcodes, revPrimers Sequence F003D150("GQY1XT001ASWK1", "CCAACCCGTCAATTCMTTTRAGTTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG"); int primerStart, primerEnd; vector results = noDiffSingleTrim.findForward(F003D150, primerStart, primerEnd); EXPECT_EQ(5, primerStart); EXPECT_EQ(23, primerEnd); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); string primerRemoved = (F003D150.getUnaligned().substr(primerEnd)); string primerKept = (F003D150.getUnaligned().substr(primerStart)); EXPECT_EQ("CCGTCAATTCMTTTRAGTTATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", primerKept); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", primerRemoved); //no diffs allowed - aligned gap between primer and sequence F003D150.setAligned("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); Sequence F003D150_1("GQY1XT001ASWK1", "......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); Sequence F003D150_2("GQY1XT001ASWK1", "......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); int countBases = 0; string temp = F003D150.getAligned(); map mapAligned; for (int i = 0; i < temp.length(); i++) { if (isalpha(temp[i])) { mapAligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned. } //ie. the 3rd base may be at spot 10 in the alignment results = noDiffSingleTrim.findForward(F003D150, primerStart, primerEnd); EXPECT_EQ(5, primerStart); EXPECT_EQ(23, primerEnd); F003D150_1.filterToPos(mapAligned[primerEnd-1]+1); string primerRemovedKeepDots = F003D150_1.getAligned(); primerRemoved = F003D150.getAligned().substr(mapAligned[primerEnd-1]+1); primerKept = F003D150.getAligned().substr(mapAligned[primerStart]); F003D150_2.filterToPos(mapAligned[primerStart]); string primerKeptKeepDots = F003D150_2.getAligned(); EXPECT_EQ("CCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKept); EXPECT_EQ("-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemoved); EXPECT_EQ(".............CCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKeptKeepDots); EXPECT_EQ("................................TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemovedKeepDots); //no diffs allowed - aligned no gap between primer and sequence F003D150.setAligned("......CC--AACCCGTCAATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); F003D150_1.setAligned("......CC--AACCCGTCAATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); F003D150_2.setAligned("......CC--AACCCGTCAATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); countBases = 0; temp = F003D150.getAligned(); mapAligned.clear(); for (int i = 0; i < temp.length(); i++) { if (isalpha(temp[i])) { mapAligned[countBases] = i; countBases++; } } results = noDiffSingleTrim.findForward(F003D150, primerStart, primerEnd); EXPECT_EQ(5, primerStart); EXPECT_EQ(23, primerEnd); F003D150_1.filterToPos(mapAligned[primerEnd-1]+1); primerRemovedKeepDots = F003D150_1.getAligned(); primerRemoved = F003D150.getAligned().substr(mapAligned[primerEnd-1]+1); primerKept = F003D150.getAligned().substr(mapAligned[primerStart]); F003D150_2.filterToPos(mapAligned[primerStart]); primerKeptKeepDots = F003D150_2.getAligned(); EXPECT_EQ("CCGTCAATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKept); EXPECT_EQ("TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemoved); EXPECT_EQ(".............CCGTCAATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKeptKeepDots); EXPECT_EQ("...............................TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemovedKeepDots); //2 diffs allowed //CCGTCAATTCMTTTRAGT TrimOligos someDiffSingleTrim(2,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, nullVector); //pdiffs, rpdiffs, bdiffs, primers, barcodes, revPrimers //CCGTCAATTCMTTTRAGT F003D150.setAligned("......CC--AACCCGTGTATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); F003D150_1.setAligned("......CC--AACCCGTGTATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); F003D150_2.setAligned("......CC--AACCCGTGTATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); results = someDiffSingleTrim.findForward(F003D150, primerStart, primerEnd); EXPECT_EQ(5, primerStart); EXPECT_EQ(23, primerEnd); F003D150_1.filterToPos(mapAligned[primerEnd-1]+1); primerRemovedKeepDots = F003D150_1.getAligned(); primerRemoved = F003D150.getAligned().substr(mapAligned[primerEnd-1]+1); primerKept = F003D150.getAligned().substr(mapAligned[primerStart]); F003D150_2.filterToPos(mapAligned[primerStart]); primerKeptKeepDots = F003D150_2.getAligned(); EXPECT_EQ("CCGTGTATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKept); EXPECT_EQ("TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemoved); EXPECT_EQ(".............CCGTGTATTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerKeptKeepDots); EXPECT_EQ("...............................TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", primerRemovedKeepDots); //CCGTCAATTCMTTTRAGT F003D150.setAligned("......CC--AACCCGTGTTTTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); results = someDiffSingleTrim.findForward(F003D150, primerStart, primerEnd); EXPECT_EQ(0, primerStart); //indicates failure EXPECT_EQ(0, primerEnd); //indicates failure EXPECT_EQ("......CC--AACCCGTGTTTTCMTTTRAGTTATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC--GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG.....", F003D150.getAligned()); } TEST(Test_TrimOligos, SingleDirectionFindReverse) { //ATTACCGCGGCTGCTGG TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed - unaligned TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, rpdiffs, bdiffs, primers, barcodes, revPrimers //ATTACCGCGGCTGCTGG Sequence F003D150("GQY1XT001ASWK1", "TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGGATTACCGCGGCTGCTGGTGGTCCAGT"); int primerStart, primerEnd; vector results = noDiffSingleTrim.findReverse(F003D150, primerStart, primerEnd); EXPECT_EQ(84, primerStart); EXPECT_EQ(101, primerEnd); EXPECT_EQ("match", noDiffSingleTrim.getCodeValue(results[1], 0)); string primerRemoved = (F003D150.getUnaligned().substr(0, primerStart)); string primerKept = (F003D150.getUnaligned().substr(0, primerEnd)); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGGATTACCGCGGCTGCTGG", primerKept); EXPECT_EQ("TATCTATGCATTTCACCGCTACACCACGCATTCCGCATACTTCTCGCCCACTCGAGCCCGGCAGTTTATTACCGCGGCTGCTGG", primerRemoved); //no diffs allowed - aligned gap between primer and sequence ATTACCGCGGCTGCTGG F003D150.setAligned("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACCGCGGCTGCTGG-GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); Sequence F003D150_1("GQY1XT001ASWK1", "......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACCGCGGCTGCTGG-GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); Sequence F003D150_2("GQY1XT001ASWK1", "......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACCGCGGCTGCTGG-GAGCCCGGCAGTTTATTACC-GCGGC-GCTGG....."); int countBases = 0; string temp = F003D150.getAligned(); map mapAligned; for (int i = 0; i < temp.length(); i++) { if (isalpha(temp[i])) { mapAligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned. } //ie. the 3rd base may be at spot 10 in the alignment results = noDiffSingleTrim.findReverse(F003D150, primerStart, primerEnd); EXPECT_EQ(76, primerStart); EXPECT_EQ(93, primerEnd); F003D150_1.filterFromPos(mapAligned[primerStart]); string primerRemovedKeepDots = F003D150_1.getAligned(); primerRemoved = F003D150.getAligned().substr(0, mapAligned[primerStart]); primerKept = F003D150.getAligned().substr(0, mapAligned[primerEnd-1]+1); F003D150_2.filterFromPos(mapAligned[primerEnd-1]+1); string primerKeptKeepDots = F003D150_2.getAligned(); EXPECT_EQ("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACCGCGGCTGCTGG", primerKept); EXPECT_EQ("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACCGCGGCTGCTGG......................................", primerKeptKeepDots); EXPECT_EQ("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-", primerRemoved); EXPECT_EQ("......CC--AACCCGTCAATTCMTTTRAGT-TATCTATGC--ATTTCACCG-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-.......................................................", primerRemovedKeepDots); //2 diffs allowed TrimOligos someDiffSingleTrim(0,2,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, bdiffs, primers, barcodes, revPrimers //ATTACCGCGGCTGCTGG F003D150.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTGG-TTACG....."); F003D150_1.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTGG-TTACG....."); F003D150_2.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTGG-TTACG....."); mapAligned.clear(); countBases=0; temp = F003D150.getAligned(); for (int i = 0; i < temp.length(); i++) { if (isalpha(temp[i])) { mapAligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned. } results = someDiffSingleTrim.findReverse(F003D150, primerStart, primerEnd); EXPECT_EQ(75, primerStart); EXPECT_EQ(92, primerEnd); F003D150_1.filterFromPos(mapAligned[primerStart]); primerRemovedKeepDots = F003D150_1.getAligned(); primerRemoved = F003D150.getAligned().substr(0, mapAligned[primerStart]); primerKept = F003D150.getAligned().substr(0, mapAligned[primerEnd-1]+1); F003D150_2.filterFromPos(mapAligned[primerEnd-1]+1); primerKeptKeepDots = F003D150_2.getAligned(); EXPECT_EQ("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTGG", primerKept); EXPECT_EQ("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTGG...........", primerKeptKeepDots); EXPECT_EQ("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-", primerRemoved); EXPECT_EQ("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-............................", primerRemovedKeepDots); //no match option F003D150.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTAA-TTACG....."); F003D150_1.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTAA-TTACG....."); F003D150_2.setAligned("......CC--AA-C-T-ACACCACGCATTCC--GCATACTTCTCGCCCACTC-ATTACC-TTGCS-GCGGCTGCTGG-GAGCCCGGCAGTTT-ATTAGGGCGGCTGCTAA-TTACG....."); mapAligned.clear(); countBases=0; temp = F003D150.getAligned(); for (int i = 0; i < temp.length(); i++) { if (isalpha(temp[i])) { mapAligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned. } results = someDiffSingleTrim.findReverse(F003D150, primerStart, primerEnd); EXPECT_EQ(0, primerStart); EXPECT_EQ(0, primerEnd); } TEST(Test_TrimOligos, SingleDirectionReverseOligos) { //ATTACCGCGGCTGCTGG TestTrimOligos testTrim; testTrim.oligos.loadSingle(); //no diffs allowed - unaligned TrimOligos noDiffSingleTrim(0,0,0,testTrim.oligos.primers, testTrim.oligos.barcodes, testTrim.oligos.revPrimer); //pdiffs, rpdiffs, bdiffs, primers, barcodes, revPrimers string testOligos = "ATTACCGCGGCTGCTGG"; EXPECT_EQ("CCAGCAGCCGCGGTAAT" , noDiffSingleTrim.reverseOligo(testOligos)); testOligos = "CCGTCAATTCMTTTRAGT"; EXPECT_EQ("ACTYAAAKGAATTGACGG" , noDiffSingleTrim.reverseOligo(testOligos)); } /**************************************************************************************************/ mothur-1.48.0/TestMothur/testtrimoligos.hpp000066400000000000000000000007111424121717000210370ustar00rootroot00000000000000// // testtrimoligos.hpp // Mothur // // Created by Sarah Westcott on 7/14/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #ifndef testtrimoligos_hpp #define testtrimoligos_hpp #include "trimoligos.h" #include "gtest/gtest.h" #include "fakeoligos.h" class TestTrimOligos : public TrimOligos { public: TestTrimOligos(); ~TestTrimOligos(); FakeOligos oligos; }; #endif /* testtrimoligos_hpp */ mothur-1.48.0/TestMothur/testvsearchfileparser.cpp000066400000000000000000000105461424121717000223610ustar00rootroot00000000000000// // testvsearchfileparser.cpp // Mothur // // Created by Sarah Westcott on 3/24/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "testvsearchfileparser.h" /**************************************************************************************************/ TestVsearchFileParser::TestVsearchFileParser() { //setup m = MothurOut::getInstance(); TestDataSet data; filenames = data.getSubsetFNGFiles(); ct = data.getCountTable(); } /**************************************************************************************************/ TestVsearchFileParser::~TestVsearchFileParser() { delete ct; } /************************************************************************************************** TEST_CASE("Testing VsearchParser Class") { TestVsearchFileParser testVParser; VsearchFileParser vsearchParser(testVParser.filenames[0], testVParser.filenames[1], "name"); SECTION("CreateVsearchFasta") { INFO("Using First 100 sequences of final.fasta and final.names") // Only appears on a FAIL CAPTURE(vsearchParser.getVsearchFile()); // Displays this variable on a FAIL CHECK(vsearchParser.getVsearchFile() == "tempSeqs.txt.sorted.fasta.temp"); ifstream in; testVParser.util.openInputFile(vsearchParser.getVsearchFile(), in); while (!in.eof()) { Sequence seq(in); testVParser.util.gobble(in); vector pieces; string name = seq.getName(); testVParser.util.splitAtChar(name, pieces, '='); string abundString = pieces[1].substr(0, pieces[1].length()-1); int abund = 0; testVParser.util.mothurConvert(abundString, abund); int totalSeqs = testVParser.ct->getNumSeqs(testVParser.removeAbundances(name)); CHECK(abund == totalSeqs); } in.close(); testVParser.util.mothurRemove("tempSeqs.txt.sorted.fasta.temp"); } SECTION("Remove Abundances") { INFO("Using GQY1XT001C44N8/size=3677/") // Only appears on a FAIL string seqName = "GQY1XT001C44N8/size=3677/"; CAPTURE(testVParser.removeAbundances(seqName)); // Displays this variable on a FAIL CHECK(testVParser.removeAbundances(seqName) == "GQY1XT001C44N8"); } SECTION("Create List File") { INFO("Using lines like: S 1 275 * * * * * GQY1XT001C44N8/ab=3677/ *") // Only appears on a FAIL vsearchParser.getVsearchFile(); ifstream in; testVParser.util.openInputFile(vsearchParser.getVsearchFile(), in); vector seqNames; while (!in.eof()) { Sequence seq(in); testVParser.util.gobble(in); string name = seq.getName(); seqNames.push_back(name); } in.close(); testVParser.util.mothurRemove("tempSeqs.txt.sorted.fasta.temp"); ofstream out; testVParser.util.openOutputFile("temp.txt", out); map binNames; for (int i = 0; i < seqNames.size(); i++) { int bin = (i+1)%10; string name = testVParser.removeAbundances(seqNames[i]); //name = (testVParser.data.getNameMap())[name]; //dup names out << "S\t" + toString(bin) + "\t275\t*\t*\t*\t*\t*\t" + seqNames[i] + "\t*\n"; map::iterator it = binNames.find(bin); if (it != binNames.end()) { it->second += "," + name; } else { binNames[bin] = name; } } out.close(); int numBins = binNames.size(); testVParser.createListFile("temp.txt", "temp.list", "temp.rabund", "temp.sabund", numBins, "0.03"); ifstream in2; testVParser.util.openInputFile("temp.list", in2); ListVector list2(in2); in2.close(); testVParser.util.mothurRemove("temp.list"); testVParser.util.mothurRemove("temp.rabund"); testVParser.util.mothurRemove("temp.sabund"); //for each bin for (int i = 0; i < list2.getNumBins(); i++) { string binnames = list2.get(i); CAPTURE(binnames); CHECK(binnames == binNames[i]); } } }*/ /**************************************************************************************************/ mothur-1.48.0/TestMothur/testvsearchfileparser.h000066400000000000000000000012261424121717000220210ustar00rootroot00000000000000// // testvsearchfileparser.h // Mothur // // Created by Sarah Westcott on 3/24/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__testvsearchfileparser__ #define __Mothur__testvsearchfileparser__ #include "vsearchfileparser.h" #include "dataset.h" class TestVsearchFileParser : public VsearchFileParser { public: TestVsearchFileParser(); ~TestVsearchFileParser(); MothurOut* m; vector filenames; CountTable* ct; using VsearchFileParser::removeAbundances; using VsearchFileParser::createListFile; }; #endif /* defined(__Mothur__testvsearchfileparser__) */ mothur-1.48.0/Ubuntu_20_Build.txt000066400000000000000000000012361424121717000165660ustar00rootroot00000000000000Ubuntu 20 Install dependancies To use GSL: sudo apt-get install lib-gsl-dev To use Boost: sudo apt-get install libboost-all-dev To use Readline: sudo apt-get install libreadline-dev To use HDF5: sudo apt-get install libhdf5-dev Set locations in makefile OPTIMIZE ?= yes USEREADLINE ?= yesrename. USEBOOST ?= yes USEHDF5 ?= yes USEGSL ?= yes LOGFILE_NAME ?= no BOOST_LIBRARY_DIR ?= "/usr/lib/x86_64-linux-gnu/" BOOST_INCLUDE_DIR ?= "/usr/include/" HDF5_LIBRARY_DIR ?= "/usr/lib/x86_64-linux-gnu/hdf5/serial/" HDF5_INCLUDE_DIR ?= "/usr/include/hdf5/serial/" GSL_LIBRARY_DIR ?= "/usr/lib/x86_64-linux-gnu/" GSL_INCLUDE_DIR ?= "/usr/include/" make clean make mothur-1.48.0/bugs_issue_template.md000066400000000000000000000025251424121717000175120ustar00rootroot00000000000000**Have you checked out the frequently asked questions? https://mothur.org/wiki/Frequently_asked_questions** To help us resolve issues quickly, please include the following: * Please describe the expected behavior and the actual behavior. This is a detailed description of the error. The more specific you can be the easier it is for us to solve the problem. There is no detail too small or insignificant. * Please describe how to reproduce the issue. For example, the commands you ran within mothur as well as any outside program you used to format the data. * Please tell us about your system. For example, the version of mothur, operating system details and hardware limitations. _"I am running version 1.39.5 on OSX 10.13.2 with 32G of RAM."_ **Additional information you are welcome to provide:** * The log file - The log file includes important information for us about your hardware, version of mothur and release date. It also shows us the workflow and helps us spot simple errors. The log file helps us diagnose most issues quickly without the need for additional data. * The input files use with the command causing the issue. If they are too large to attach often a small sample of the file is enough. To select the first 500 lines from a file you can use the head system command. ex. _head -n 500 yourInputFile > newFileNameForSampling_ mothur-1.48.0/makefile-internal000066400000000000000000000071301424121717000164340ustar00rootroot00000000000000#compress for release command #zip -r {filename.zip} {foldername} USEREADLINE ?= yes USEBOOST ?= yes USEHDF5 ?= yes USEGSL ?= yes LOGFILE_NAME ?= yes VERSION = "\"1.47.0\"" RELEASE_DATE = "\"1/21/22\"" # Optimize to level 3: CXXFLAGS += -O3 -std=c++11 LDFLAGS += -std=c++11 -pthreads ifeq ($(strip $(LOGFILE_NAME)),yes) LOGFILE_NAME="\"mothur.logfile\"" endif #if you are a mac user use the following line #TARGET_ARCH += -arch x86_64 #if you using cygwin to build Windows the following line #CXX = x86_64-w64-mingw32-g++ #CC = x86_64-w64-mingw32-g++ #TARGET_ARCH += -m64 -static #if you are a linux user use the following line #CXXFLAGS += -mtune=generic CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION} # if you do not want to use the readline library, set this to no. # make sure you have the library installed ifeq ($(strip $(USEREADLINE)),yes) CXXFLAGS += -DUSE_READLINE LIBS += -lreadline endif #The boost libraries allow you to read gz files. ifeq ($(strip $(USEBOOST)),yes) #statically link so the boost install is not required on users machine BOOST_INCLUDE_DIR="/usr/local/include" BOOST_LIBRARY_DIR="/usr/local/lib" #windows paths #BOOST_INCLUDE_DIR="/usr/x86_64-w64-mingw32/sys-root/mingw/include" #BOOST_LIBRARY_DIR="/usr/x86_64-w64-mingw32/sys-root/mingw/lib" CXXFLAGS += -DUSE_BOOST -I ${BOOST_INCLUDE_DIR} LIBS += ${BOOST_LIBRARY_DIR}/libboost_system.a LIBS += ${BOOST_LIBRARY_DIR}/libboost_iostreams.a LIBS += ${BOOST_LIBRARY_DIR}/libboost_filesystem.a LIBS += ${BOOST_LIBRARY_DIR}/libz.a endif #User specified HDF5 library ifeq ($(strip $(USEHDF5)),yes) HDF5_INCLUDE_DIR="/usr/local/include" HDF5_LIBRARY_DIR="/usr/local/lib" LDFLAGS += -L ${HDF5_LIBRARY_DIR} LIBS += ${HDF5_LIBRARY_DIR}/libhdf5_hl_cpp.a LIBS += ${HDF5_LIBRARY_DIR}/libhdf5_cpp.a LIBS += ${HDF5_LIBRARY_DIR}/libhdf5_hl.a LIBS += ${HDF5_LIBRARY_DIR}/libhdf5.a CXXFLAGS += -DUSE_HDF5 -I ${HDF5_INCLUDE_DIR} endif #User specified GSL library ifeq ($(strip $(USEGSL)),yes) GSL_LIBRARY_DIR ?= "\"/usr/local/gsl/lib\"" GSL_INCLUDE_DIR ?= "\"/usr/local/gsl/include\"" #windows paths #GSL_INCLUDE_DIR="/usr/x86_64-w64-mingw32/sys-root/mingw/include" #GSL_LIBRARY_DIR="/usr/x86_64-w64-mingw32/sys-root/mingw/lib" LDFLAGS += -L ${GSL_LIBRARY_DIR} LIBS += ${GSL_LIBRARY_DIR}/libgsl.a LIBS += ${GSL_LIBRARY_DIR}/libgslcblas.a CXXFLAGS += -DUSE_GSL -I ${GSL_INCLUDE_DIR} endif # # INCLUDE directories for mothur # # VPATH=source/calculators:source/chimera:source/classifier:source/clearcut:source/commands:source/communitytype:source/datastructures:source/engines:source/metastats:source/read:source/svm:source/ skipUchime := source/uchime_src/ subdirs := $(sort $(dir $(filter-out $(skipUchime), source/, $(wildcard source/*/)))) subDirIncludes = $(patsubst %, -I %, $(subdirs)) subDirLinking = $(patsubst %, -L%, $(subdirs)) CXXFLAGS += -I. $(subDirIncludes) LDFLAGS += $(subDirLinking) # # Get the list of all .cpp files, rename to .o files # OBJECTS=$(patsubst %.cpp,%.o,$(wildcard $(addsuffix *.cpp,$(subdirs)))) OBJECTS+=$(patsubst %.c,%.o,$(wildcard $(addsuffix *.c,$(subdirs)))) OBJECTS+=$(patsubst %.cpp,%.o,$(wildcard *.cpp)) OBJECTS+=$(patsubst %.c,%.o,$(wildcard *.c)) mothur : $(OBJECTS) $(CXX) $(LDFLAGS) $(TARGET_ARCH) -o $@ $(OBJECTS) $(LIBS) strip mothur %.o : %.c %.h $(COMPILE.c) $(OUTPUT_OPTION) $< %.o : %.cpp %.h $(COMPILE.cpp) $(OUTPUT_OPTION) $< %.o : %.cpp %.hpp $(COMPILE.cpp) $(OUTPUT_OPTION) $< clean : @rm -f $(OBJECTS) mothur-1.48.0/source/000077500000000000000000000000001424121717000144215ustar00rootroot00000000000000mothur-1.48.0/source/alignreport.cpp000066400000000000000000000155011424121717000174550ustar00rootroot00000000000000/* * nastreport.cpp * * * Created by Pat Schloss on 12/19/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "sequence.hpp" #include "nast.hpp" #include "alignment.hpp" #include "alignreport.hpp" /******************************************************************************************************************/ AlignReport::AlignReport() : Report() { try { fillHeaders(); } catch(exception& e) { m->errorOut(e, "AlignReport", "AlignReport"); exit(1); } } /**************************************************************************************************/ void AlignReport::read(ifstream& repFile){ try { repFile >> queryName; repFile >> queryLength; repFile >> templateName; repFile >> templateLength; repFile >> searchMethod; repFile >> dummySearchScore; repFile >> alignmentMethod; repFile >> queryStart; repFile >> queryEnd; repFile >> templateStart; repFile >> templateEnd; repFile >> pairwiseAlignmentLength; repFile >> gapsInQuery; repFile >> gapsInTemplate; repFile >> longestInsert; repFile >> simBtwnQueryAndTemplate; gobble(repFile); searchScore = 0; if(dummySearchScore != "nan"){ util.mothurConvert(dummySearchScore, searchScore); } } catch(exception& e) { m->errorOut(e, "AlignReport", "read"); exit(1); } } /******************************************************************************************************************/ void AlignReport::fillHeaders() { try { reportHeaders.push_back("QueryName"); reportHeaders.push_back("QueryLength"); reportHeaders.push_back("TemplateName"); reportHeaders.push_back("TemplateLength"); reportHeaders.push_back("SearchMethod"); reportHeaders.push_back("SearchScore"); reportHeaders.push_back("AlignmentMethod"); reportHeaders.push_back("QueryStart"); reportHeaders.push_back("QueryEnd"); reportHeaders.push_back("TemplateStart"); reportHeaders.push_back("TemplateEnd"); reportHeaders.push_back("PairwiseAlignmentLength"); reportHeaders.push_back("GapsInQuery"); reportHeaders.push_back("GapsInTemplate"); reportHeaders.push_back("LongestInsert"); reportHeaders.push_back("SimBtwnQuery&Template"); } catch(exception& e) { m->errorOut(e, "AlignReport", "fillHeaders"); exit(1); } } /******************************************************************************************************************/ void AlignReport::print(ofstream& candidateReportFile){ try { candidateReportFile << queryName << '\t' << queryLength << '\t' << templateName << '\t' << templateLength << '\t'; candidateReportFile << searchMethod << '\t' << setprecision(2) << fixed << searchScore << '\t'; candidateReportFile << alignmentMethod << '\t' << queryStart << "\t" << queryEnd << '\t'; candidateReportFile << templateStart << "\t" << templateEnd << '\t'; candidateReportFile << pairwiseAlignmentLength << '\t' << gapsInQuery << '\t' << gapsInTemplate << '\t'; candidateReportFile << longestInsert << '\t'; candidateReportFile << setprecision(2) << simBtwnQueryAndTemplate; candidateReportFile << endl; candidateReportFile.flush(); } catch(exception& e) { m->errorOut(e, "AlignReport", "print"); exit(1); } } /******************************************************************************************************************/ string AlignReport::getSeqReport(){ try { string output = ""; output += queryName + '\t' + toString(queryLength) + '\t' + templateName + '\t' + toString(templateLength) + '\t'; string temp = toString(searchScore); int pos = temp.find_last_of('.'); //find deicmal point if their is one //if there is a decimal if (pos != -1) { temp = temp.substr(0, pos+3); } //set precision to 2 places else{ temp += ".00"; } output += searchMethod + '\t' + temp + '\t'; output += alignmentMethod + '\t' + toString(queryStart) + "\t" + toString(queryEnd) + '\t'; output += toString(templateStart) + "\t" + toString(templateEnd) + '\t'; output += toString(pairwiseAlignmentLength) + '\t' + toString(gapsInQuery) + '\t' + toString(gapsInTemplate) + '\t'; output += toString(longestInsert) + '\t'; temp = toString(simBtwnQueryAndTemplate); pos = temp.find_last_of('.'); //find deicmal point if their is one //if there is a decimal if (pos != -1) { temp = temp.substr(0, pos+3); } //set precision to 2 places else{ temp += ".00"; } output += temp + '\n'; return output; } catch(exception& e) { m->errorOut(e, "AlignReport", "getSeqReport"); exit(1); } } /******************************************************************************************************************/ void AlignReport::setCandidate(Sequence* candSeq){ try { queryName = candSeq->getName(); queryLength = candSeq->getNumBases(); } catch(exception& e) { m->errorOut(e, "AlignReport", "setCandidate"); exit(1); } } /******************************************************************************************************************/ void AlignReport::setTemplate(Sequence* tempSeq){ try { templateName = tempSeq->getName(); templateLength = tempSeq->getNumBases(); } catch(exception& e) { m->errorOut(e, "AlignReport", "setTemplate"); exit(1); } } /******************************************************************************************************************/ void AlignReport::setSearchParameters(string method, float score){ try { searchMethod = method; searchScore = score; } catch(exception& e) { m->errorOut(e, "AlignReport", "setSearchParameters"); exit(1); } } /******************************************************************************************************************/ void AlignReport::setAlignmentParameters(string method, Alignment* align){ try { alignmentMethod = method; queryStart = align->getCandidateStartPos(); queryEnd = align->getCandidateEndPos(); templateStart = align->getTemplateStartPos(); templateEnd = align->getTemplateEndPos(); pairwiseAlignmentLength = align->getPairwiseLength(); gapsInQuery = pairwiseAlignmentLength - (queryEnd - queryStart + 1); gapsInTemplate = pairwiseAlignmentLength - (templateEnd - templateStart + 1); } catch(exception& e) { m->errorOut(e, "AlignReport", "setAlignmentParameters"); exit(1); } } /******************************************************************************************************************/ void AlignReport::setNastParameters(Nast nast){ try { longestInsert = nast.getMaxInsertLength(); simBtwnQueryAndTemplate = nast.getSimilarityScore(); } catch(exception& e) { m->errorOut(e, "AlignReport", "setNastParameters"); exit(1); } } /******************************************************************************************************************/ mothur-1.48.0/source/alignreport.hpp000066400000000000000000000103021424121717000174540ustar00rootroot00000000000000#ifndef NASTREPORT_HPP #define NASTREPORT_HPP /* * nastreport.hpp * * * Created by Pat Schloss on 12/19/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "report.hpp" #include "nast.hpp" #include "alignment.hpp" /******************************************************************************************************************/ class AlignReport : public Report { public: AlignReport(); ~AlignReport() = default; //io functions, note - printHeaders / readHeaders / getHeaders in Report parent class void read(ifstream&); //read line in report file void print(ofstream&); //print line in report file string getSeqReport(); //return string containing line from report file //set values from objects void setCandidate(Sequence*); //sets query name and length void setTemplate(Sequence*); //sets template name and length void setSearchParameters(string, float); //sets searchMethod, searchScore void setAlignmentParameters(string, Alignment*); //sets queryStart, queryEnd, templateStart, templateEnd, gapsInQuery, gapsInTemplate, alignmentMethod, pairwiseAlignmentLength void setNastParameters(Nast); //sets longestInsert and simBtwnQueryAndTemplate //set values void setQueryName(string n) { queryName = n; } void setTemplateName(string n) { templateName = n; } void setSearchMethod(string n) { searchMethod = n; } void setAlignmentMethod(string n) { alignmentMethod = n; } void setQueryLength(int n) { queryLength = n; } void setTemplateLength(int n) { templateLength = n; } void setQueryStart(int n) { queryStart = n; } void setQueryEnd(int n) { queryEnd = n; } void setTemplateStart(int n) { templateStart = n; } void setTemplateEnd(int n) { templateEnd = n; } void setPairwiseAlignmentLength(int n) { pairwiseAlignmentLength = n; } void setGapsInQuery(int n) { gapsInQuery = n; } void setGapsInTemplate(int n) { gapsInTemplate = n; } void setLongestInsert(int i) { longestInsert = i; } void setSearchScore(float i) { searchScore = i; } void setSimBtwnQueryAndTemplate(float i) { simBtwnQueryAndTemplate = i; } //get values string getQueryName() { return queryName; } string getTemplateName() { return templateName; } string getSearchMethod() { return searchMethod; } string getAlignmentMethod() { return alignmentMethod; } int getQueryLength() { return queryLength; } int getTemplateLength() { return templateLength; } int getQueryStart() { return queryStart; } int getQueryEnd() { return queryEnd; } int getTemplateStart() { return templateStart; } int getTemplateEnd() { return templateEnd; } int getPairwiseAlignmentLength() { return pairwiseAlignmentLength; } int getGapsInQuery() { return gapsInQuery; } int getGapsInTemplate() { return gapsInTemplate; } int getLongestInsert() { return longestInsert; } float getSearchScore() { return searchScore; } float getSimBtwnQueryAndTemplate() { return simBtwnQueryAndTemplate; } private: void fillHeaders(); string queryName, templateName, searchMethod, alignmentMethod, dummySearchScore; int queryLength, templateLength, queryStart, queryEnd, templateStart, templateEnd, pairwiseAlignmentLength, gapsInQuery, gapsInTemplate, longestInsert; float searchScore, simBtwnQueryAndTemplate; }; /******************************************************************************************************************/ #endif mothur-1.48.0/source/averagelinkage.cpp000077500000000000000000000025571424121717000201060ustar00rootroot00000000000000#ifndef AVERAGE_H #define AVERAGE_H //test #include "cluster.hpp" /* This class implements the average UPGMA, average neighbor clustering algorithm */ /***********************************************************************/ AverageLinkage::AverageLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) : Cluster(rav, lv, dm, c, s, a) { saveRow = -1; saveCol = -1; } /***********************************************************************/ //This function returns the tag of the method. string AverageLinkage::getTag() { return("an"); } /***********************************************************************/ //This function updates the distance based on the average linkage method. bool AverageLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) { try { if ((saveRow != smallRow) || (saveCol != smallCol)) { rowBin = rabund->get(smallRow); colBin = rabund->get(smallCol); totalBin = rowBin + colBin; saveRow = smallRow; saveCol = smallCol; } colCell.dist = (colBin * colCell.dist + rowBin * rowCell.dist) / totalBin; return(true); } catch(exception& e) { m->errorOut(e, "AverageLinkage", "updateDistance"); exit(1); } } /***********************************************************************/ /***********************************************************************/ #endif mothur-1.48.0/source/calcsparcc.cpp000077500000000000000000000247641424121717000172430ustar00rootroot00000000000000// // runSparcc.cpp // PDSSparCC // // Created by Patrick Schloss on 10/31/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #include "calcsparcc.h" #include "linearalgebra.h" /**************************************************************************************************/ CalcSparcc::CalcSparcc(vector > sharedVector, int maxIterations, int numSamplings, string method){ try { m = MothurOut::getInstance(); numOTUs = (int)sharedVector[0].size(); numGroups = (int)sharedVector.size(); normalizationMethod = method; int numOTUs = (int)sharedVector[0].size(); addPseudoCount(sharedVector); vector > > allCorrelations(numSamplings); // float cycClockStart = clock(); // unsigned long long cycTimeStart = time(nullptr); for(int i=0;igetControl_pressed()) { break; } vector logFractions = getLogFractions(sharedVector, method); getT_Matrix(logFractions); //this step is slow... getT_Vector(); getD_Matrix(); vector basisVariances = getBasisVariances(); //this step is slow... vector > correlation = getBasisCorrelations(basisVariances); excluded.resize(numOTUs); for(int j=0;j 0.10 && iter < maxIterations){ maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn); excludeValues(excludeRow, excludeColumn); vector excludedBasisVariances = getBasisVariances(); correlation = getBasisCorrelations(excludedBasisVariances); iter++; } allCorrelations[i] = correlation; } if (!m->getControl_pressed()) { if(numSamplings > 1){ getMedian(allCorrelations); } else{ median = allCorrelations[0]; } } } catch(exception& e) { m->errorOut(e, "CalcSparcc", "CalcSparcc"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::addPseudoCount(vector >& sharedVector){ try { for(int i=0;igetControl_pressed()) { return; } for(int j=0;jerrorOut(e, "CalcSparcc", "addPseudoCount"); exit(1); } } /**************************************************************************************************/ vector CalcSparcc::getLogFractions(vector > sharedVector, string method){ //dirichlet by default try { vector logSharedFractions(numGroups * numOTUs, 0); if(method == "dirichlet"){ vector alphas(numGroups); for(int i=0;igetControl_pressed()) { return logSharedFractions; } alphas = util.randomDirichlet(sharedVector[i]); for(int j=0;jgetControl_pressed()) { return logSharedFractions; } float total = 0.0; for(int j=0;jerrorOut(e, "CalcSparcc", "addPseudoCount"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::getT_Matrix(vector sharedFractions){ try { tMatrix.resize(numOTUs * numOTUs, 0); vector diff(numGroups); for(int j1=0;j1getControl_pressed()) { return; } float mean = 0.0; for(int i=0;ierrorOut(e, "CalcSparcc", "getT_Matrix"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::getT_Vector(){ try { tVector.assign(numOTUs, 0); for(int j1=0;j1getControl_pressed()) { return; } for(int j2=0;j2errorOut(e, "CalcSparcc", "getT_Vector"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::getD_Matrix(){ try { float d = numOTUs - 1.0; dMatrix.resize(numOTUs); for(int i=0;igetControl_pressed()) { return; } dMatrix[i].resize(numOTUs, 1); dMatrix[i][i] = d; } } catch(exception& e) { m->errorOut(e, "CalcSparcc", "getD_Matrix"); exit(1); } } /**************************************************************************************************/ vector CalcSparcc::getBasisVariances(){ try { LinearAlgebra LA; vector variances = LA.solveEquations(dMatrix, tVector); for(int i=0;igetControl_pressed()) { return variances; } if(variances[i] < 0){ variances[i] = 1e-4; } } return variances; } catch(exception& e) { m->errorOut(e, "CalcSparcc", "getBasisVariances"); exit(1); } } /**************************************************************************************************/ vector > CalcSparcc::getBasisCorrelations(vector basisVariance){ try { vector > rho(numOTUs); for(int i=0;igetControl_pressed()) { return rho; } float var_j = basisVariance[j]; rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j)); if(rho[i][j] > 1.0) { rho[i][j] = 1.0; } else if(rho[i][j] < -1.0) { rho[i][j] = -1.0; } rho[j][i] = rho[i][j]; } } return rho; } catch(exception& e) { m->errorOut(e, "CalcSparcc", "getBasisCorrelations"); exit(1); } } /**************************************************************************************************/ float CalcSparcc::getExcludedPairs(vector > rho, int& maxRow, int& maxColumn){ try { float maxRho = 0; maxRow = -1; maxColumn = -1; for(int i=0;igetControl_pressed()) { return maxRho; } float tester = abs(rho[i][j]); if(tester > maxRho && excluded[i][j] != 1){ maxRho = tester; maxRow = i; maxColumn = j; } } } return maxRho; } catch(exception& e) { m->errorOut(e, "CalcSparcc", "getExcludedPairs"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){ try { tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn]; tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn]; dMatrix[excludeRow][excludeColumn] = 0; dMatrix[excludeColumn][excludeRow] = 0; dMatrix[excludeRow][excludeRow]--; dMatrix[excludeColumn][excludeColumn]--; excluded[excludeRow][excludeColumn] = 1; excluded[excludeColumn][excludeRow] = 1; } catch(exception& e) { m->errorOut(e, "CalcSparcc", "excludeValues"); exit(1); } } /**************************************************************************************************/ void CalcSparcc::getMedian(vector > > allCorrelations){ try { int numSamples = (int)allCorrelations.size(); median.resize(numOTUs); for(int i=0;i hold(numSamples); for(int i=0;igetControl_pressed()) { return; } for(int k=0;kerrorOut(e, "CalcSparcc", "getMedian"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/calcsparcc.h000077500000000000000000000027201424121717000166740ustar00rootroot00000000000000 #ifndef PDSSparCC_runSparcc_h #define PDSSparCC_runSparcc_h // // runSparcc.h // PDSSparCC // // Created by Patrick Schloss on 10/31/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // /**************************************************************************************************/ //#include "sparcc.h" #include "mothurout.h" #include "utils.hpp" /**************************************************************************************************/ class CalcSparcc { public: CalcSparcc(vector >, int, int, string); vector > getRho() { return median; } private: MothurOut* m; Utils util; void addPseudoCount(vector >&); vector getLogFractions(vector >, string); void getT_Matrix(vector); void getT_Vector(); void getD_Matrix(); vector getBasisVariances(); vector > getBasisCorrelations(vector); float getExcludedPairs(vector >, int&, int&); void excludeValues(int, int); void getMedian(vector > >); vector tMatrix; vector > dMatrix; vector tVector; vector > excluded; vector > median; int numOTUs; int numGroups; string normalizationMethod; }; #endif /**************************************************************************************************/ mothur-1.48.0/source/calculators/000077500000000000000000000000001424121717000167355ustar00rootroot00000000000000mothur-1.48.0/source/calculators/README.txt000066400000000000000000000031001424121717000204250ustar00rootroot00000000000000There are several different types of calculators used by mothur. All are stored in this grouping, and broken down into smaller groups for each type. Below is a breif description of the types and the commands that use them. /********************************************************************/ Calculator class is parent for to all the otucalcs. OtuCalcs are used by: collect.single collect.shared rarefaction.single rarefaction.shared summary.single summary.shared dist.shared tree.shared get.communitytype heatmap.sim venn /********************************************************************/ ClusterMetric class is parent to all the clustercalcs. The clustermetrics are used in the opti method of clustering. ClusterCalcs are used by: cluster cluster.split cluster.fit mgcluster /********************************************************************/ DistCalc class is parent to all distcalcs. The distcalcs are used for finding the distance between sequences. DistCalcs are used by: dist.seqs pairwise.seqs /********************************************************************/ TreeCalculator is parent to the unifraccalcs. unifraccalcs are used by: parsimony unifrac.weighted unifrac.unweighted /********************************************************************/ DiversityCalculator class is parent to all the diversity calculators. The diversity calculators are used by the estimator.single command. https://github.com/chrisquince/DiversityEstimates https://www.ncbi.nlm.nih.gov/pubmed/18650928 /********************************************************************/ mothur-1.48.0/source/calculators/accuracy.cpp000077500000000000000000000014101424121717000212320ustar00rootroot00000000000000// // accuracy.cpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "accuracy.hpp" /***********************************************************************/ double Accuracy::getValue(double tp, double tn, double fp, double fn) { try { long long p = tp + fn; long long n = fp + tn; double accuracy = (tp + tn) / (double) (p + n); if(p + n == 0) { accuracy = 0; } if (isnan(accuracy) || isinf(accuracy)) { accuracy = 0; } return accuracy; } catch(exception& e) { m->errorOut(e, "Accuracy", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/accuracy.hpp000077500000000000000000000012101424121717000212350ustar00rootroot00000000000000// // accuracy.hpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef accuracy_hpp #define accuracy_hpp #include "calculator.h" /***********************************************************************/ class Accuracy : public ClusterMetric { public: Accuracy() : ClusterMetric("accuracy") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/Accuracy"; } private: }; /***********************************************************************/ #endif /* accuracy_hpp */ mothur-1.48.0/source/calculators/ace.cpp000066400000000000000000000100571424121717000201740ustar00rootroot00000000000000/* * ace.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "ace.h" /***********************************************************************/ EstOutput Ace::getValues(SAbundVector* rank) { try { data.resize(3,0); double ace, acelci, acehci; double nrare = 0; double srare = 0; double sabund = 0; double Cace, term1, gamace; double numsum = 0; double maxRank = (double)rank->getMaxRank(); for(int i=1;i<=maxRank;i++){ if(i<=abund){ srare += rank->get(i); nrare += i*rank->get(i); numsum += (i-1)*i*rank->get(i); } else if(i>abund) {sabund += rank->get(i);} } double sobs = srare + sabund; if (util.isEqual(nrare,0.0)){ Cace = 0.0000; } else { Cace = 1.0000 -(double)rank->get(1)/(double)nrare; } double denom = Cace * (double)(nrare * (nrare-1)); if(denom <= 0.0){ term1=0.0000; } else { term1 = (double)(srare * numsum)/(double)denom - 1.0; } if(term1 >= 0.0){ gamace = term1; } else { gamace = 0.0; } if(gamace >= 0.64){ gamace = gamace * (1 + (nrare * (1 - Cace) * numsum) / denom); if(gamace<0){ gamace = 0; } } if(util.isEqual(Cace, 0.0)){ ace = 0.00;}//ace else{ ace = (double)sabund+((double)srare+(double)rank->get(1)*gamace)/Cace;//ace } /* The following code was obtained from Anne Chao for calculating the SE for her ACE estimator My modification was to reset the frequencies so that a singleton is found in rank[1] insted of in rank[0], etc. I have also added the forumlae to calculate the 95% confidence intervals. */ double j,D_s=0,nn=0,ww=0; int Max_Index=rank->getMaxRank()+1; double pp, temp1, temp2; vector Part_N_Part_F(Max_Index+1,0.0); for (j=1; jget(j); for (j=1; jget(j) * j; ww += rank->get(j) * j * ( j - 1); } } double C_hat = 1.-rank->get(1)/double(nn); double Gamma = ( D_s * ww) / ( C_hat * nn * ( nn - 1.)) - 1.; temp1 = double(nn - rank->get(1)); temp2 = double(nn - 1.); if ( Gamma > 0.){ Part_N_Part_F[1] = ( D_s + nn) * ( 1. + rank->get(1) * ww / temp1 / temp2) / temp1 + nn * D_s * ww * ( temp1 - 1.) / ( temp1 * temp1 * temp2 * temp2) - ( nn + rank->get(1)) / temp1; for ( j=2; j<=Max_Index; j++){ if(j<=abund){ Part_N_Part_F[j] = ( nn * temp1 - j * rank->get(1) * D_s) / temp1 / temp1 * ( 1. + rank->get(1) * ww / temp1 / temp2) + j * rank->get(1) * D_s * nn * ( ( j - 1.) * temp1 * temp2 - ww * ( temp1 + temp2)) / temp1 / temp1 / temp1 / temp2 / temp2 + j * rank->get(1) * rank->get(1) / temp1 / temp1; } } } else{ Part_N_Part_F[1] = ( nn + D_s ) / temp1; for ( j=2; j<=Max_Index; j++){ if(j<=abund){ Part_N_Part_F[j-1] = ( nn * temp1 - j * rank->get(1) * D_s ) / temp1 / temp1; } } } if(Max_Index>abund){ for ( j=abund+1; j<=Max_Index; j++){ Part_N_Part_F[j-1] = 1.; } } for ( temp1=0., temp2=0., j=0; jget(j); temp2 += pp * pp * rank->get(j); } double se = temp2 - temp1 * temp1 / ace; if(toString(se) == "nan"){ acelci = ace; acehci = ace; } else if(util.isEqual(ace, 0.000)){ acelci = ace; acehci = ace; } else if(util.isEqual(ace, sobs)){ double ci = 1.96*pow(se,0.5); acelci = ace-ci; //ace lci acehci = ace+ci; //ace hci }else{ double denom = pow(ace-sobs,2); double c = exp(1.96*pow((log(1+se/denom)),0.5)); acelci = sobs+(ace-sobs)/c; //ace lci acehci = sobs+(ace-sobs)*c; //ace hci } data[0] = ace; data[1] = acelci; data[2] = acehci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Ace", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/ace.h000066400000000000000000000013611424121717000176370ustar00rootroot00000000000000#ifndef ACE_H #define ACE_H /* * ace.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Ace estimator on a single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Ace : public Calculator { public: Ace(int n) : abund(n), Calculator("ace", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Ace"; } private: int abund; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/bergerparker.cpp000077500000000000000000000012411424121717000221150ustar00rootroot00000000000000/* * ssbp.cpp * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "bergerparker.h" /***************************************************************/ EstOutput BergerParker::getValues(SAbundVector* rank){ try { data.resize(1,0); //Berger-Parker index double BP = (double)rank->getMaxRank() / (double)rank->getNumSeqs(); data[0] = BP; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "BergerParker", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/bergerparker.h000077500000000000000000000014241424121717000215650ustar00rootroot00000000000000#ifndef BERGERPARKER_H #define BERGERPARKER_H /* * bergerparker.h * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the SSBP estimator on single group. It is a child of the calculator class.*/ /***********************************************************************/ class BergerParker : public Calculator { public: BergerParker() : Calculator("bergerparker", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Bergerparker"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/boneh.cpp000077500000000000000000000033411424121717000205400ustar00rootroot00000000000000/* * boneh.cpp * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "boneh.h" #include /***********************************************************************/ //This solves for the value of 'v' using a binary search. double Boneh::getV(double f1, double n, double rs) { if(util.isEqual(rs, 0.0)) { return 0; } double accuracy = .0001; double v = 100000.0; double step = v/2; double ls = v * (1 - pow((1 - f1/(n*v)), n)); while(abs(ls - rs) > accuracy) { if(ls > rs) { v -= step; } else { v += step; } ls = v * (1 - pow((1 - f1/(n * v)), n)); step /= 2; } return v; } /***********************************************************************/ EstOutput Boneh::getValues(SAbundVector* sabund){ try { data.resize(1,0); bool valid = false; double sum = 0; double n = (double)sabund->getNumSeqs(); if(util.isEqual(f,0.0)){ f=n; } double f1 = (double)sabund->get(1); for(int i = 1; i < sabund->size(); i++){ sum += (double)sabund->get(i) * exp(-i); } if(sabund->get(1) > sum) valid = true; sum = 0; if(valid) { for(int j = 1; j < sabund->size(); j++){ sum += sabund->get(j) * pow((1 - (double)j / n), n); } double v = getV(f1, n, sum); sum = 0; for(int j = 1; j < sabund->size(); j++) { for (int i = 0; i < sabund->get(j); i++) { sum += pow(1 - j / n, n) * (1 - pow(1 - j / n, f)); } } sum += v * pow(1 - f1/(n*v), n) * (1 - pow(1 - f1/(n*v), f)); } data[0] = sum; return data; } catch(exception& e) { m->errorOut(e, "Boneh", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/boneh.h000077500000000000000000000014501424121717000202040ustar00rootroot00000000000000#ifndef BONEH_H #define BONEH_H /* * boneh.h * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the boneh calculator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Boneh : public Calculator { public: Boneh(int size) : f(size), Calculator("boneh", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Boneh"; } private: double getV(double, double, double); int f; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/bootstrap.cpp000077500000000000000000000015701424121717000214640ustar00rootroot00000000000000/* * bootstrap.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "bootstrap.h" /***********************************************************************/ EstOutput Bootstrap::getValues(SAbundVector* rank){ try { //vector bootData(3,0); data.resize(1,0); double maxRank = (double)rank->getMaxRank(); double sampled = rank->getNumSeqs(); double sobs = rank->getNumBins(); double boot = (double)sobs; for(int i=1;i<=maxRank;i++){ boot += (double)rank->get(i)*pow((1.0-(double)i/(double)sampled),sampled); } data[0] = boot; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Bootstrap", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/bootstrap.h000077500000000000000000000013751424121717000211340ustar00rootroot00000000000000#ifndef BOOTSTRAP_H #define BOOTSTRAP_H /* * bootstrap.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Bootstrap estimator on single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Bootstrap : public Calculator { public: Bootstrap() : Calculator("bootstrap", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Bootstrap"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/bstick.cpp000077500000000000000000000036771424121717000207400ustar00rootroot00000000000000/* * bstick.cpp * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "bstick.h" /***********************************************************************/ double BStick::invSum(int index, double numSpec) { double sum = 0; for(int i = index; i <= numSpec; i++) sum += 1/(double)i; return sum; } /***********************************************************************/ RAbundVector BStick::getRAbundVector(SAbundVector* rank){ vector rData; int mr = 1; int nb = 0; int ns = 0; for(int i = rank->size()-1; i > 0; i--) { double cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; nb += cur; ns += i*cur; for(int j = 0; j < cur; j++) rData.push_back(i); } RAbundVector rav = RAbundVector(rData, mr, nb, ns); rav.setLabel(rank->getLabel()); return rav; } /***************************************************************************/ /***************************************************************************/ EstOutput BStick::getValues(SAbundVector* rank){ try { data.resize(3,0); rdata = getRAbundVector(rank); double numInd = (double)rdata.getNumSeqs(); double numSpec = (double)rdata.getNumBins(); double sumExp = 0; double sumObs = 0; double maxDiff = 0; for(int i = 0; i < rdata.size(); i++) { sumObs += rdata.get(i); sumExp += numInd/numSpec*invSum(i+1,numSpec); double diff = fabs(sumObs-sumExp); if(diff > maxDiff) maxDiff = diff; } data[0] = maxDiff/numInd; data[1] = 0.886/sqrt(rdata.size()); data[2] = 1.031/sqrt(rdata.size()); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "BStick", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/bstick.h000077500000000000000000000015111424121717000203660ustar00rootroot00000000000000#ifndef BSTICK_H #define BSTICK_H /* * bstick.h * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the BStick estimator on single group. It is a child of the calculator class.*/ /***********************************************************************/ class BStick : public Calculator { public: BStick() : Calculator("bstick", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Bstick"; } private: double invSum(int, double); RAbundVector getRAbundVector(SAbundVector*); RAbundVector rdata; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/calculator.cpp000066400000000000000000000446141424121717000216030ustar00rootroot00000000000000// // calculator.cpp // Mothur // // Created by Sarah Westcott on 4/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "calculator.h" /***********************************************************************/ int DistCalc::setStart(string seqA, string seqB) { try { int start = 0; int alignLength = seqA.length(); for(int i=0;ierrorOut(e, "DistCalc", "setStart"); exit(1); } } /***********************************************************************/ int DistCalc::setEnd(string seqA, string seqB) { try { int end = 0; int alignLength = seqA.length(); for(int i=alignLength-1;i>=0;i--){ if((seqA[i] != '.' || seqB[i] != '.')){ //one of you is not a terminal gap end = i; break; } } return end; } catch(exception& e) { m->errorOut(e, "DistCalc", "setEnd"); exit(1); } } /***********************************************************************/ // this assumes that sequences start and end with '.'s instead of'-'s. int DistCalc::setStartIgnoreTermGap(string seqA, string seqB, bool& overlap) { try { int start = 0; int alignLength = seqA.length(); for(int i=0;ierrorOut(e, "DistCalc", "setStartIgnoreTermGap"); exit(1); } } /***********************************************************************/ // this assumes that sequences start and end with '.'s instead of'-'s. int DistCalc::setEndIgnoreTermGap(string seqA, string seqB, bool& overlap) { try { int end = 0; int alignLength = seqA.length(); for(int i=alignLength-1;i>=0;i--){ if(seqA[i] != '.' && seqB[i] != '.' && seqA[i] != '-' && seqB[i] != '-' ){ //ignore terminal gaps end = i; overlap = true; break; } } return end; } catch(exception& e) { m->errorOut(e, "DistCalc", "setEndIgnoreTermGap"); exit(1); } } /***********************************************************************/ vector DistCalc::setStartsIgnoreTermGap(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector starts; starts.resize(otu.numSeqs, -1); int alignLength = cols.size(); int seqAStart = 0; for(int i=0;i thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if ((thisChar == '.') || (thisChar == '-')) { } //every seq in otu is a '.' or '-' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < starts.size(); k++) { if ((starts[k] == -1) && (seqA.otuData[cols[i]][0] != '.') && (seqA.otuData[cols[i]][0] != '-')) { starts[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "DistCalc", "setStartsIgnoreTermGap"); exit(1); } } /***********************************************************************/ vector DistCalc::setEndsIgnoreTermGap(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector ends; ends.resize(otu.numSeqs, -1); int alignLength = cols.size(); int seqAEnd = 0; for(int i=alignLength-1;i>=0;i--){//for each column we want to include if ((seqA.otuData[cols[i]][0] != '.') && (seqA.otuData[cols[i]][0] != '-')) { seqAEnd = i; break; } } //set start positions int numset = 0; for(int i=seqAEnd;i>=0;i--){ //for each column we want to include if(numset == otu.numSeqs) { break; } vector thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if ((thisChar == '.') || (thisChar == '-')){ } //every seq in otu is a '.' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < ends.size(); k++) { if ((ends[k] == -1) && (seqA.otuData[cols[i]][0] != '.') && (seqA.otuData[cols[i]][0] != '-')){ ends[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "DistCalc", "setEndsIgnoreTermGap"); exit(1); } } /***********************************************************************/ vector DistCalc::setStarts(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector starts; starts.resize(otu.numSeqs, 0); int alignLength = cols.size(); int seqAStart = 0; for(int i=0;i thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if (thisChar == '.') { } //every seq in otu is a '.' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < starts.size(); k++) { if (starts[k] == 0) { starts[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "DistCalc", "setStarts"); exit(1); } } /***********************************************************************/ vector DistCalc::setEnds(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector ends; ends.resize(otu.numSeqs, 0); int alignLength = cols.size(); int seqAEnd = 0; for(int i=alignLength-1;i>=0;i--){//for each column we want to include if (seqA.otuData[cols[i]][0] != '.') { seqAEnd = i; break; } } //set start positions int numset = 0; for(int i=alignLength-1;i>=0;i--){ //for each column we want to include if (seqAEnd <= i) { //our query seq starts before this point so set rest of unset starts to query start for (int k = 0; k < ends.size(); k++) { if (ends[k] == 0) { ends[k] = seqAEnd; numset++; } } break; }else if(numset == otu.numSeqs) { break; } vector thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if (thisChar == '.') { } //every seq in otu is a '.' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < ends.size(); k++) { if (ends[k] == 0) { ends[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "DistCalc", "setEnds"); exit(1); } } /***********************************************************************/ //nb1 and nb2 have size 1, unless amino acid = B or Z void DistCalc::predict(vector nb1, vector nb2, double& p, double& dp, double& d2p, double& tt, double eigs[20], double probs[20][20]){ try { double q; for (int i = 0; i < nb1.size(); i++) { for (int l = 0; l < 20; l++) { double elambdat = exp(tt * eigs[l]); // printf("l = %ld, nb1 = %ld, nb2 = %ld\n", l, nb1[i], nb2[i]); // printf("l = %ld, eig[m] = %f, prob[m][nb1 - 1] = %f, prob[m][nb2 - 1] = %f\n", l, eigs[l], probs[l][nb1[i] - 1], probs[l][nb2[i] - 1]); q = probs[l][nb1[i]-1] * probs[l][nb2[i]-1] * elambdat; p += q; dp += eigs[l] * q; double TEMP = eigs[l]; d2p += TEMP * TEMP * q; } } //printf("p = %f, q = %f, tt = %f\n", p, q, tt); // printf("dp = %f, d2p = %f\n", dp, d2p); } catch(exception& e) { m->errorOut(e, "DistCalc", "predict"); exit(1); } } /***********************************************************************/ //nb1 and nb2 have size 1, unless amino acid = B or Z void DistCalc::fillNums(vector& numAs, vector& numBs, int numA, int numB){ try { if (numA == asx) { //B asn or asp (3 or 4) if (numB == asx) { //B asn or asp numAs.push_back(3); numBs.push_back(3); //asn, asn numAs.push_back(3); numBs.push_back(4); //asn, asp numAs.push_back(4); numBs.push_back(3); //asp, asn numAs.push_back(4); numBs.push_back(4); //asp, asp }else { if (numB == glx) { //Z gln or glu (6 or 7) numAs.push_back(3); numBs.push_back(6); //asn, gln numAs.push_back(3); numBs.push_back(7); //asn, glu numAs.push_back(4); numBs.push_back(6); //asp, gln numAs.push_back(4); numBs.push_back(7); //asp, glu }else { if (numB < ser2) { numB++; } numAs.push_back(3); numBs.push_back(numB); //asn, numB numAs.push_back(4); numBs.push_back(numB); //asp, numB } } }else { if (numA == glx) { //Z gln or glu (6 or 7) if (numB == asx) { //B asn or asp numAs.push_back(6); numBs.push_back(3); //gln, asn numAs.push_back(6); numBs.push_back(4); //gln, asp numAs.push_back(7); numBs.push_back(3); //glu, asn numAs.push_back(7); numBs.push_back(4); //glu, asp }else { if (numB == glx) { //Z gln or glu (6 or 7) numAs.push_back(6); numBs.push_back(6); //gln, gln numAs.push_back(6); numBs.push_back(7); //gln, glu numAs.push_back(7); numBs.push_back(6); //glu, gln numAs.push_back(7); numBs.push_back(7); //glu, glu }else { if (numB < ser2) { numB++; } numAs.push_back(6); numBs.push_back(numB); //gln, numB numAs.push_back(7); numBs.push_back(numB); //glu, numB } } }else { if (numA < ser2) { numA++; } if (numB == asx) { //B asn or asp numAs.push_back(numA); numBs.push_back(3); //numA, asn numAs.push_back(numA); numBs.push_back(4); //numA, asp numAs.push_back(numA); numBs.push_back(3); //numA, asn numAs.push_back(numA); numBs.push_back(4); //numA, asp }else if (numB == glx) { //Z gln or glu (6 or 7) numAs.push_back(numA); numBs.push_back(6); //numA, gln numAs.push_back(numA); numBs.push_back(7); //numA, glu numAs.push_back(numA); numBs.push_back(6); //numA, gln numAs.push_back(numA); numBs.push_back(7); //numA, glu } } } } catch(exception& e) { m->errorOut(e, "DistCalc", "fillNums"); exit(1); } } /***********************************************************************/ double DistCalc::makeDists(Protein A, Protein B, double eigs[20], double probs[20][20]){ try { int numBases = A.getAlignLength(); vector seqA = A.getAligned(); vector seqB = B.getAligned(); bool inf = false; bool neginfinity = false; bool overlap = false; double delta, lnlike, slope, curv, tt; tt = 0.1; delta = tt / 2.0; for (int l = 0; l < 20; l++) { //reset for this attempt lnlike = 0.0; slope = 0.0; curv = 0.0; neginfinity = false; overlap = false; double oldweight = 1.0; if (m->getControl_pressed()) { break; } for (int i = 0; i < numBases; i++) { int numA = seqA[i].getNum(); int numB = seqB[i].getNum(); if (numA != stop && numA != del && numA != quest && numA != unk && numB != stop && numB != del && numB != quest && numB != unk) { double p = 0.0; double dp = 0.0; double d2p = 0.0; overlap = true; vector numAs; vector numBs; if (numA != asx && numA != glx && numB != asx && numB != glx) { if (numA < ser2) { numA++; } if (numB < ser2) { numB++; } numAs.push_back(numA); numBs.push_back(numB); //+1 avoid 0 }else { fillNums(numAs, numBs, numA, numB); } predict(numAs, numBs, p, dp, d2p, tt, eigs, probs); if (p <= 0.0) { neginfinity = true; }else { slope += oldweight*dp / p; curv += oldweight*(d2p / p - dp * dp / (p * p)); //printf("%ld:%ld, dp = %f, p = %f, d2p = %f\n", l, i, dp, p, d2p); //printf("%ld:%ld, slope = %f, curv = %f, oldweight[i] = %ld\n", l, i, slope, curv, oldweight); } }//endif stop }//endif bases if (!overlap) { tt = -1.0; l += 20; inf = true; }else if (!neginfinity) { if (curv < 0.0) { tt -= slope / curv; if (tt > 10000.0) { tt = -1.0; l += 20; inf = true; } }else { if ((slope > 0.0 && delta < 0.0) || (slope < 0.0 && delta > 0.0)) { delta /= -2; } tt += delta; } }else { delta /= -2; tt += delta; } if (tt < 0.00001 && !inf) { tt = 0.00001; } }//endif attempts dist = tt; //exit(1); return dist; } catch(exception& e) { m->errorOut(e, "DistCalc", "makeDists"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/calculator.h000066400000000000000000000107201424121717000212370ustar00rootroot00000000000000#ifndef CALCULATOR_H #define CALCULATOR_H #include "sabundvector.hpp" #include "sharedrabundvector.hpp" #include "sequence.hpp" #include "protein.hpp" #include "mothurout.h" #include "utils.hpp" /* The calculator class is the parent class for all the different estimators implemented in mothur except the tree calculators. It has 2 pure functions EstOutput getValues(SAbundVector*), which works on a single group, and EstOutput getValues(SharedRAbundVector* shared1, SharedRAbundVector* shared2), which compares 2 groups. */ typedef vector EstOutput; /***********************************************************************/ class Calculator { public: Calculator(){ m = MothurOut::getInstance(); needsAll = false; } Calculator(string n, int c, bool f) : name(n), cols(c), multiple(f) { m = MothurOut::getInstance(); needsAll = false; }; Calculator(string n, int c, bool f, bool a) : name(n), cols(c), multiple(f), needsAll(a) { m = MothurOut::getInstance(); }; virtual ~Calculator()=default; virtual EstOutput getValues(SAbundVector*) = 0; virtual EstOutput getValues(vector) = 0; //optional calc that returns the otus labels of shared otus virtual EstOutput getValues(vector sv, vector&, vector) { data = getValues(sv); return data; } virtual void print(ostream& f) { f.setf(ios::fixed, ios::floatfield); f.setf(ios::showpoint); f << data[0]; for(int i=1;imothurOut(getCitation()+"\n");} protected: Utils util; MothurOut* m; EstOutput data; string name; int cols; bool multiple; bool needsAll; }; /**************************************************************************************************/ //True Negative - far, cluster apart //True Positive - close, cluster together //False Negative - close, cluster apart //False Positve - far, cluster together class ClusterMetric { public: ClusterMetric(){ m = MothurOut::getInstance(); } ClusterMetric(string n){ m = MothurOut::getInstance(); name = n; } virtual ~ClusterMetric(){}; virtual double getValue(double, double, double, double) = 0; //tp, tn, fp, fn virtual string getName() { return name; } virtual string getCitation() = 0; void citation() { m->mothurOut(getCitation()+"\n"); } protected: MothurOut* m; Utils util; string name; }; /**************************************************************************************************/ class DistCalc { public: DistCalc(double c){ name = "unknown"; dist = 0; cutoff = c; m = MothurOut::getInstance(); } virtual ~DistCalc() = default; virtual double calcDist(Sequence, Sequence) { return -1.0; } virtual double calcDist(Protein, Protein) { return -1.0; } virtual string getCitation() = 0; void citation() { m->mothurOut(getCitation()+"\n");} virtual string getName() { return name; } //currently not used virtual vector calcDist(Sequence A, classifierOTU otu, vector cols) { vector dists; dists.resize(otu.numSeqs, 1.0); return dists; } protected: double dist; MothurOut* m; Utils util; double cutoff; string name; vector setStartsIgnoreTermGap(classifierOTU seqA, classifierOTU otu, vector cols); vector setEndsIgnoreTermGap(classifierOTU seqA, classifierOTU otu, vector cols); vector setStarts(classifierOTU seqA, classifierOTU otu, vector cols); vector setEnds(classifierOTU seqA, classifierOTU otu, vector cols); int setStart(string, string); int setEnd(string, string); int setStartIgnoreTermGap(string, string, bool&); int setEndIgnoreTermGap(string, string, bool&); //used by protein calcs double makeDists(Protein, Protein, double eigs[20], double probs[20][20]); void predict(vector nb1, vector nb2, double& p, double& dp, double& d2p, double& tt, double eigs[20], double probs[20][20]); void fillNums(vector& nb1, vector& nb2, int, int); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/canberra.cpp000077500000000000000000000017201424121717000212210ustar00rootroot00000000000000/* * canberra.cpp * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "canberra.h" /***********************************************************************/ EstOutput Canberra::getValues(vector shared) { try { data.resize(1,0); int numSharedOTUS = 0; double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); //is this otu shared if ((Aij != 0) && (Bij != 0)) { numSharedOTUS++; } if ((Aij + Bij) != 0) { sum += ((abs(Aij - Bij)) / (float) (Aij + Bij)); } } data[0] = (1 / (float) shared[0]->getNumBins()) * sum; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Canberra", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/canberra.h000077500000000000000000000012041424121717000206630ustar00rootroot00000000000000#ifndef CANBERRA_H #define CANBERRA_H /* * canberra.h * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Canberra : public Calculator { public: Canberra() : Calculator("canberra", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Canberra"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/chao1.cpp000077500000000000000000000041431424121717000204410ustar00rootroot00000000000000/* * chao1.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "chao1.h" /***********************************************************************/ EstOutput Chao1::getValues(SAbundVector* rank){ try { data.resize(3,0); double sobs = (double)rank->getNumBins(); //this is a modification do to a vector fill error that occurs when an empty sharedRabund creates a sabund //in that case there is no 1 0r 2. double singles; if (rank->size() > 1) { singles = (double)rank->get(1); }else{ singles = 0.0; } double doubles; if (rank->size() > 2) { doubles = (double)rank->get(2); }else{ doubles = 0.0; } double chaovar = 0.0000; double chao = sobs + singles*(singles-1)/(2*(doubles+1)); if(singles > 0 && doubles > 0){ chaovar = singles*(singles-1)/(2*(doubles+1)) + singles*pow(2*singles-1, 2)/(4*pow(doubles+1,2)) + pow(singles, 2)*doubles*pow(singles-1, 2)/(4*pow(doubles+1,4)); } else if(singles > 0 && util.isEqual(doubles,0)){ chaovar = singles*(singles-1)/2 + singles*pow(2*singles-1, 2)/4 - pow(singles, 4)/(4*chao); } else if(singles == 0){ chaovar = sobs*exp(-1*rank->getNumSeqs()/sobs)*(1-exp(-1*rank->getNumSeqs()/sobs)); } double chaohci, chaolci; if(singles>0){ double denom = pow(chao-sobs,2); double c = exp(1.96*pow((log(1+chaovar/denom)),0.5)); chaolci = sobs+(chao-sobs)/c;//chao lci chaohci = sobs+(chao-sobs)*c;//chao hci } else{ double p = exp(-1*rank->getNumSeqs()/sobs); chaolci = sobs/(1-p)-1.96*pow(sobs*p/(1-p), 0.5); chaohci = sobs/(1-p)+1.96*pow(sobs*p/(1-p), 0.5); if(chaolci < sobs){ chaolci = sobs; } } data[0] = chao; data[1] = chaolci; data[2] = chaohci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Chao1", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/chao1.h000077500000000000000000000013311424121717000201020ustar00rootroot00000000000000#ifndef CHAO1_H #define CHAO1_H /* * chao1.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the Ace estimator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Chao1 : public Calculator { public: Chao1() : Calculator("chao", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Chao"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/coverage.cpp000077500000000000000000000010551424121717000212400ustar00rootroot00000000000000/* * coverage.cpp * Mothur * * Created by Pat Schloss on 4/22/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "coverage.h" /***********************************************************************/ EstOutput Coverage::getValues(SAbundVector* rank){ try { data.resize(1,0); data[0] = 1. - rank->get(1) / (double)rank->getNumSeqs(); return data; } catch(exception& e) { m->errorOut(e, "Coverage", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/coverage.h000077500000000000000000000013571424121717000207120ustar00rootroot00000000000000#ifndef COVERAGE_H #define COVERAGE_H /* * coverage.h * Mothur * * Created by Pat Schloss on 4/22/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "calculator.h" /* This class implements the coverage estimator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Coverage : public Calculator { public: Coverage() : Calculator("coverage", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Coverage"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/dayhoff.h000077500000000000000000000101701424121717000205300ustar00rootroot00000000000000/* * dayhoff.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_NJ_DAYHOFF_H_ #define _INC_NJ_DAYHOFF_H_ 1 /* * As sequence divergence increases, we need to correct for multiple hits * when using Kimura distance correction method for amino acid sequences. * * This matrix of values represents the estimated "Accepted Point Mutations" * or PAMs for a range of amino acid sequence divergence, starting at 75% * up through 93% (in 0.1% increments). * * This model is derived from Dayhoff (1978). * * This Dayhoff matrix and the shortcut methods for dealing with Kimura * correction at high sequence divergence (> 75%) are derived from similar * work in Clustal W: * * Thompson, J.D., Higgins, D.G., Gibson, T.J., "CLUSTAL W: * improving the sensitivity of progressive multiple sequence * alignment through sequence weighting, position-specific gap * penalties and weight matrix choice.", * Nucleic Acids Research, 22:4673-4680, 1994 * */ int NJ_dayhoff[]={ 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 226, 227, 228, 229, 230, 231, 232, 233, 234, 236, 237, 238, 239, 240, 241, 243, 244, 245, 246, 248, 249, 250, 252, 253, 254, 255, 257, 258, 260, 261, 262, 264, 265, 267, 268, 270, 271, 273, 274, 276, 277, 279, 281, 282, 284, 285, 287, 289, 291, 292, 294, 296, 298, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, 325, 328, 330, 332, 335, 337, 339, 342, 344, 347, 349, 352, 354, 357, 360, 362, 365, 368, 371, 374, 377, 380, 383, 386, 389, 393, 396, 399, 403, 407, 410, 414, 418, 422, 426, 430, 434, 438, 442, 447, 451, 456, 461, 466, 471, 476, 482, 487, 493, 498, 504, 511, 517, 524, 531, 538, 545, 553, 560, 569, 577, 586, 595, 605, 615, 626, 637, 649, 661, 675, 688, 703, 719, 736, 754, 775, 796, 819, 845, 874, 907, 945, 988 }; #endif /* _INC_NJ_DAYHOFF_H_ */ mothur-1.48.0/source/calculators/diversitycalc.h000066400000000000000000000037511424121717000217610ustar00rootroot00000000000000// // diversitycalc.h // Mothur // // Created by Sarah Westcott on 5/23/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef diversitycalc_h #define diversitycalc_h #include "mothurout.h" #include "sabundvector.hpp" #include "utils.hpp" /***********************************************************************/ struct acceptRatioPos { double acceptRatio; bool high; int pos; acceptRatioPos() { pos = 0; acceptRatio = 1.0; high = false; } acceptRatioPos(double ac, int po, bool h) : acceptRatio(ac), pos(po), high(h) {} ~acceptRatioPos() = default; }; /***********************************************************************/ inline bool operator< (const acceptRatioPos& lhs, const acceptRatioPos& rhs){ return rhs.acceptRatio > lhs.acceptRatio; } inline bool operator> (const acceptRatioPos& lhs, const acceptRatioPos& rhs){ return rhs.acceptRatio < lhs.acceptRatio; } inline bool operator<=(const acceptRatioPos& lhs, const acceptRatioPos& rhs){ return !(lhs.acceptRatio > rhs.acceptRatio); } inline bool operator>=(const acceptRatioPos& lhs, const acceptRatioPos& rhs){ return !(lhs.acceptRatio < rhs.acceptRatio); } /***********************************************************************/ class DiversityCalculator { public: DiversityCalculator(bool rs){ m = MothurOut::getInstance(); requiresSamples = rs; } virtual ~DiversityCalculator(){}; virtual string getTag() = 0; virtual bool requiresSample() { return requiresSamples; } virtual vector getValues(int, vector&) { return results; } virtual vector getValues(SAbundVector* rank) { return outputs; } virtual void getValues(SAbundVector* rank, vector& ) { } protected: Utils util; MothurOut* m; bool requiresSamples; vector results; vector outputs; }; /***********************************************************************/ #endif /* diversitycalc_h */ mothur-1.48.0/source/calculators/diversityutils.cpp000066400000000000000000001330461424121717000225530ustar00rootroot00000000000000// // diversityutils.cpp // Mothur // // Created by Sarah Westcott on 4/11/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "diversityutils.hpp" /***********************************************************************/ double f1_2(double x, void *pvParams) { t_LSParams *ptLSParams = (t_LSParams *) pvParams; double dMDash = ptLSParams->dMDash, dV = ptLSParams->dV, dNu = ptLSParams->dNu; int n = ptLSParams->n; double t = ((x - dMDash)*(x - dMDash))/dV; double dExp = x*((double) n) - exp(x); double dF = pow(1.0 + t/dNu, -0.5*(dNu + 1.0)); return exp(dExp)*dF; } /***********************************************************************/ double f1Log(double x, void *pvParams) { t_LNParams *ptLNParams = (t_LNParams *) pvParams; double dMDash = ptLNParams->dMDash, dV = ptLNParams->dV; int n = ptLNParams->n; double dTemp = (x - dMDash); double dExp = x*((double) n) - exp(x) - 0.5*((dTemp*dTemp)/dV); double dRet = exp(dExp); return dRet; } /***********************************************************************/ double f1(double x, void *pvParams) { t_LNParams *ptLNParams = (t_LNParams *) pvParams; double dMDash = ptLNParams->dMDash, dV = ptLNParams->dV, n = ptLNParams->n; double dTemp = (x - dMDash); double dExp = x*((double) n) - exp(x) - 0.5*((dTemp*dTemp)/dV); double dRet = exp(dExp); return dRet; } /***********************************************************************/ double derivExponent(double x, void *pvParams) { t_LNParams *ptLNParams = (t_LNParams *) pvParams; double dMDash = ptLNParams->dMDash, dV = ptLNParams->dV, n = ptLNParams->n; double dTemp = (x - dMDash)/dV, dRet = 0.0; dRet = ((double) n) - exp(x) - dTemp; return dRet; } /***********************************************************************/ void DiversityUtils::loadAbundance(t_Data *ptData, SAbundVector* rank) { try { int nNA = 0; int nL = 0, nJ = 0; int maxRank = rank->getMaxRank(); for(int i = 1; i <= maxRank; i++){ if (rank->get(i) != 0) { nNA++; } } int **aanAbund = nullptr; aanAbund = (int **) malloc(nNA*sizeof(int*)); int count = 0; for(int i = 1; i <= maxRank; i++){ if (m->getControl_pressed()) { break; } if (rank->get(i) != 0) { aanAbund[count] = (int *) malloc(sizeof(int)*2); int nA = i; int nC = rank->get(i); nL += nC; nJ += nC*nA; aanAbund[count][0] = nA; aanAbund[count][1] = nC; count++; } } ptData->nJ = nJ; ptData->nL = nL; ptData->aanAbund = aanAbund; ptData->nNA = nNA; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "loadAbundance"); exit(1); } } /***********************************************************************/ void DiversityUtils::freeAbundance(t_Data *ptData) { try { for(int i = 0; i < ptData->nNA; i++){ free(ptData->aanAbund[i]); } free(ptData->aanAbund); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "freeAbundance"); exit(1); } } /***********************************************************************/ double DiversityUtils::chao(t_Data *ptData) { try { double n1 = 0.0, n2 = 0.0; int **aanAbund = ptData->aanAbund; if(aanAbund[0][0] == 1 && aanAbund[1][0] == 2){ n1 = (double) aanAbund[0][1]; n2 = (double) aanAbund[1][1]; return ((double) ptData->nL) + 0.5*((n1*n1)/n2); } else{ return -1.0; } } catch(exception& e) { m->errorOut(e, "DiversityUtils", "chao"); exit(1); } } /***********************************************************************/ double DiversityUtils::fX(double x, double dA, double dB, double dNDash){ try { double dTemp1 = (dA*(x - dB)*(x - dB))/x; return log(x) - (1.0/dNDash)*(x + dTemp1); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "fX"); exit(1); } } /***********************************************************************/ double DiversityUtils::f2X(double x, double dA, double dB, double dNDash) { try { double dRet = 0.0, dTemp = 2.0*dA*dB*dB; dRet = (1.0/(x*x))*(1.0 + (1.0/dNDash)*(dTemp/x)); return -dRet; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "f2X"); exit(1); } } #ifdef USE_GSL /***********************************************************************/ double fMu_sirarefaction(double x, void* pvParams) { DiversityUtils dutils("sirarefaction"); t_LSParams* ptSIParams = (t_LSParams *) pvParams; double dAlphaDD = ptSIParams->dMDash*sqrt(x); double dBetaDD = ptSIParams->dV*x; double dLogP0 = dutils.logLikelihood(0, dAlphaDD, dBetaDD, ptSIParams->dNu); return (1.0 - exp(dLogP0)) - ptSIParams->dC; } /***********************************************************************/ double fMu_igrarefaction(double x, void* pvParams) { t_IGParams* ptIGParams = (t_IGParams *) pvParams; // double tx = x / 1667.0; DiversityUtils dutils("igrarefaction"); double dAlphaDD = ptIGParams->dAlpha*sqrt(x); double dBetaDD = ptIGParams->dBeta*x; double dLogP0 = dutils.logLikelihood(0, dAlphaDD, dBetaDD); // printf("dAlpha %f dBeta %f x %f dAlphaDD %f dBetaDD %f dLofP0 %f", ptIGParams->dAlpha, ptIGParams->dBeta, x, dAlphaDD, dBetaDD, dLogP0); return (1.0 - exp(dLogP0)) - ptIGParams->dC; } /***********************************************************************/ double fMu_lsrarefaction(double x, void* pvParams) { DiversityUtils dutils("lsrarefaction"); t_LSParams* ptLSParams = (t_LSParams *) pvParams; double dMDD = ptLSParams->dMDash + x; double dLogP0 = dutils.logLikelihoodQuad(0, dMDD, ptLSParams->dV, ptLSParams->dNu); return (1.0 - exp(dLogP0)) - ptLSParams->dC; } /***********************************************************************/ double fMu_lnrarefaction(double x, void* pvParams) { t_IGParams* ptIGParams = (t_IGParams *) pvParams; DiversityUtils dutils("lnrarefaction"); double dMDD = ptIGParams->dAlpha + x; double dLogP0 = dutils.logLikelihoodQuad(0, dMDD, ptIGParams->dBeta); return (1.0 - exp(dLogP0)) - ptIGParams->dC; } /***********************************************************************/ double DiversityUtils::calcMu(void *pvParams){ try { double dLogMu = 0.0; if (method == "lnrarefaction") { t_IGParams* ptIGParams = (t_IGParams *) pvParams; solveF(0, 1.0e7, ptIGParams, 1.0e-7, &dLogMu); return exp(dLogMu); }else if ((method == "igrarefaction") || (method == "sirarefaction")) { t_IGParams* ptIGParams = (t_IGParams *) pvParams; solveF(1.0, 1.0e10, ptIGParams, 1.0e-7, &dLogMu); return dLogMu; }else if (method == "lsrarefaction") { t_LSParams *ptLSParams = (t_LSParams *) pvParams; solveF(0, 1.0e7, ptLSParams, 1.0e-7, &dLogMu); return exp(dLogMu); } return dLogMu; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "calcMu"); exit(1); } } //***********************************************************************/ double DiversityUtils::logStirlingsGamma(double dZ) { return 0.5*log(2.0*M_PI) + (dZ - 0.5)*log(dZ) - dZ; } /***********************************************************************/ double DiversityUtils::logLikelihoodQuad(int n, double dMDash, double dV){ try { gsl_integration_workspace *ptGSLWS = gsl_integration_workspace_alloc(1000); double dLogFac1 = 0.0, dLogFacN = 0.0; double dResult = 0.0, dError = 0.0, dPrecision = 0.0; gsl_function tGSLF; double dEst = dMDash + ((double)n)*dV, dA = 0.0, dB = 0.0; t_LNParams tLNParams; tLNParams.n = n; tLNParams.dMDash = dMDash; tLNParams.dV = dV; tGSLF.function = &f1; if (method == "metrols") { tGSLF.function = &f1Log; } tGSLF.params = (void *) &tLNParams; dLogFac1 = log(2.0*M_PI*dV); if(n < 50){ dLogFacN = gsl_sf_fact(n); dLogFacN = log(dLogFacN); } else{ dLogFacN = gsl_sf_lngamma(((double) n) + 1.0); } if(dEst > dV){ double dMax = 0.0; double dUpper = (((double) n) + (dMDash/dV) - 1.0)/(1.0 + 1.0/dV); double dVar = 0.0; if ((method == "metroln") || (method == "metrols")) { if(fabs(dUpper) > 1.0e-7){ solveF(0.0, dUpper, (void *) &tLNParams, 1.0e-5, &dMax); } } else { solveF(0.0, dUpper, derivExponent, (void *) &tLNParams, 1.0e-5, &dMax); } //lnabund, lnshift, lnrarefact dVar = sqrt(1.0/((1.0/dV) + exp(dMax))); dA = dMax - V_MULT*dVar; dB = dMax + V_MULT*dVar; } else{ double dMax = 0.0; double dLower = dEst - dV; double dUpper = (((double) n) + (dMDash/dV) - 1.0)/(1.0 + 1.0/dV); double dVar = 0.0; if ((method == "metroln") || (method == "metrols")) { if(fabs(dUpper - dLower) > 1.0e-7){ solveF(dLower, dUpper, (void *) &tLNParams, 1.0e-5, &dMax); } else{ dMax = 0.5*(dLower + dUpper); } }else { solveF(dLower, dUpper, derivExponent, (void *) &tLNParams, 1.0e-5, &dMax); } //lnabund, lnshift, lnrarefact dVar = sqrt(1.0/((1.0/dV) + exp(dMax))); dA = dMax - V_MULT*dVar; dB = dMax + V_MULT*dVar; } if(n < 10) { dPrecision = HI_PRECISION; } else { dPrecision = LO_PRECISION; } gsl_integration_qag(&tGSLF, dA, dB, dPrecision, 0.0, 1000, GSL_INTEG_GAUSS61, ptGSLWS, &dResult, &dError); gsl_integration_workspace_free(ptGSLWS); return log(dResult) - dLogFacN -0.5*dLogFac1; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihoodQuad"); exit(1); } } //***********************************************************************/ double DiversityUtils::logLikelihoodQuad(int n, double dMDash, double dV, double dNu){ try { gsl_integration_workspace *ptGSLWS = gsl_integration_workspace_alloc(1000); double dLogFac1 = 0.0, dLogFacN = 0.0; double dN = (double) n, dResult = 0.0, dError = 0.0, dPrecision = 0.0; gsl_function tGSLF; t_LSParams tLSParams; double dA = 0.0, dB = 0.0; tLSParams.n = n; tLSParams.dMDash = dMDash; tLSParams.dV = dV; tLSParams.dNu = dNu; tGSLF.function = &f1_2; tGSLF.params = (void *) &tLSParams; if(dNu < 100){ //MAX_MU_GAMMA dLogFac1 = gsl_sf_lngamma(0.5*(dNu + 1.0)) - gsl_sf_lngamma(0.5*dNu) - 0.5*log(M_PI*dNu); } else{ dLogFac1 = 0.5*dNu*(log(0.5*(dNu + 1.0)) - log(0.5*dNu)) -0.5*log(2.0*M_PI) - 0.5; } if(n < 50){ dLogFacN = gsl_sf_fact(n); dLogFacN = log(dLogFacN); } else if(n < 100){ dLogFacN = gsl_sf_lngamma(dN + 1.0); } else{ dLogFacN = logStirlingsGamma(dN + 1.0); } dA = -100.0; dB = 100.0; if(n < 10) { dPrecision = HI_PRECISION; } else { dPrecision = LO_PRECISION; } gsl_integration_qag(&tGSLF, dA, dB, dPrecision, 0.0, 1000, GSL_INTEG_GAUSS61, ptGSLWS, &dResult, &dError); //printf("%f %f\n", dResult, dError); gsl_integration_workspace_free(ptGSLWS); return log(dResult) - dLogFacN + dLogFac1 - 0.5*log(dV); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihoodQuad"); exit(1); } } /***********************************************************************/ double DiversityUtils::logLikelihoodRampal(int n, double dMDash, double dV){ try { double dN = (double) n; double dLogLik = 0.0, dTemp = gsl_pow_int(log(dN) - dMDash,2), dTemp3 = gsl_pow_int(log(dN) - dMDash,3); dLogLik = -0.5*log(2.0*M_PI*dV) - log(dN) - (dTemp/(2.0*dV)); dLogLik += log(1.0 + 1.0/(2.0*dN*dV)*(dTemp/dV + log(dN) - dMDash - 1.0) + 1.0/(6.0*dN*dN*dV*dV*dV)*(3.0*dV*dV - (3.0*dV - 2.0*dV*dV)*(dMDash - log(dN)) - 3.0*dV*dTemp + dTemp3)); return dLogLik; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihoodRampal"); exit(1); } } //***********************************************************************/ double DiversityUtils::logLikelihoodRampal(int n, double dMDash, double dV, double dNu){ try { double dGamma = 0.5*(dNu + 1.0), dN = (double) n, dRN = 1.0/dN, dRSV = 1.0/(sqrt(dV)*sqrt(dNu)); double dZ = (log(dN) - dMDash)*dRSV; double dDZDX = dRN*dRSV, dDZDX2 = -dRN*dRN*dRSV; double dF = (1.0 + dZ*dZ); double dA = 0.0, dB = 0.0, dTemp = 0.0; double dLogFac1 = 0.0; if(dNu < 100){ //MAX_MU_GAMMA dLogFac1 = gsl_sf_lngamma(0.5*(dNu + 1.0)) - gsl_sf_lngamma(0.5*dNu) - 0.5*log(M_PI*dNu); } else{ dLogFac1 = 0.5*dNu*(log(0.5*(dNu + 1.0)) - log(0.5*dNu)) -0.5*log(2.0*M_PI) - 0.5; } dA = 4.0*dZ*dZ*dDZDX*dDZDX*dGamma*(dGamma + 1.0); dA /= dF*dF; dB = -2.0*dGamma*(dDZDX*dDZDX + dZ*dDZDX2); dB /= dF; dTemp = dRN + dA + dB; return -dGamma*log(dF) + log(dTemp) + dLogFac1 - 0.5*log(dV); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihoodRampal"); exit(1); } } //***********************************************************************/ int DiversityUtils::solveF(double x_lo, double x_hi, double (*f)(double, void*), void* params, double tol, double *xsolve){ try { int status, iter = 0, max_iter = 100; const gsl_root_fsolver_type *T; gsl_root_fsolver *s; double r = 0; gsl_function F; F.function = f; F.params = params; T = gsl_root_fsolver_brent; s = gsl_root_fsolver_alloc (T); gsl_root_fsolver_set (s, &F, x_lo, x_hi); do{ iter++; if (m->getControl_pressed()) { break; } status = gsl_root_fsolver_iterate (s); r = gsl_root_fsolver_root (s); x_lo = gsl_root_fsolver_x_lower (s); x_hi = gsl_root_fsolver_x_upper (s); status = gsl_root_test_interval (x_lo, x_hi, 0, tol); } while (status == GSL_CONTINUE && iter < max_iter); (*xsolve) = gsl_root_fsolver_root (s); gsl_root_fsolver_free (s); return status; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "solveF"); exit(1); } } //***********************************************************************/ int DiversityUtils::solveF(double x_lo, double x_hi, void* params, double tol, double *xsolve){ try { int status, iter = 0, max_iter = 100; const gsl_root_fsolver_type *T; gsl_root_fsolver *s; double r = 0; gsl_function F; F.function = &derivExponent; if (method == "igrarefaction") { F.function = &fMu_igrarefaction; } else if (method == "lnrarefaction") { F.function = &fMu_lnrarefaction; } else if (method == "lsrarefaction") { F.function = &fMu_lsrarefaction; } else if (method == "sirarefaction") { F.function = &fMu_sirarefaction; } F.params = params; T = gsl_root_fsolver_brent; s = gsl_root_fsolver_alloc (T); gsl_root_fsolver_set (s, &F, x_lo, x_hi); do{ iter++; if (m->getControl_pressed()) { break; } status = gsl_root_fsolver_iterate (s); r = gsl_root_fsolver_root (s); x_lo = gsl_root_fsolver_x_lower (s); x_hi = gsl_root_fsolver_x_upper (s); status = gsl_root_test_interval (x_lo, x_hi, 0, tol); } while (status == GSL_CONTINUE && iter < max_iter); (*xsolve) = gsl_root_fsolver_root (s); gsl_root_fsolver_free (s); return status; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "solveF"); exit(1); } } /***********************************************************************/ double DiversityUtils::logLikelihood(int n, double dAlpha, double dBeta){ try { double dLogFacN = 0.0; bool status = false; double dRet = 0.0; if(n < 50){ dLogFacN = gsl_sf_fact(n); dLogFacN = log(dLogFacN); } else{ dLogFacN = gsl_sf_lngamma(((double) n) + 1.0); } status = bessel(&dRet,n, dAlpha,dBeta); if(!status){ dRet = sd(n, dAlpha,dBeta); } return dRet - dLogFacN; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihood"); exit(1); } } /***********************************************************************/ double DiversityUtils::logLikelihood(int n, double dAlpha, double dBeta, double dGamma){ try { double dLogFacN = 0.0; bool status = false; double dRet = 0.0; if(n < 50){ dLogFacN = gsl_sf_fact(n); dLogFacN = log(dLogFacN); } else{ dLogFacN = gsl_sf_lngamma(((double) n) + 1.0); } status = bessel(&dRet,n, dAlpha,dBeta,dGamma); if(!status){ dRet = sd(n, dAlpha,dBeta,dGamma); } return dRet - dLogFacN; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "logLikelihood"); exit(1); } } /***********************************************************************/ double DiversityUtils::sd(int n, double dAlpha, double dBeta){ try { double dGamma = -0.5; double dA = 0.5*(-1.0 + sqrt(1.0 + (dAlpha*dAlpha)/(dBeta*dBeta))); double dN = (double) n, dNDash = dN + dGamma - 1.0, dRN = 1.0/dN; double dTemp1 = (0.5*dN)/(1.0 + dA), dTemp2 = 4.0*dRN*dRN*(1.0 + dA)*dA*dBeta*dBeta; double dXStar = dTemp1*(1.0 + sqrt(1.0 + dTemp2)); double dFX = fX(dXStar, dA, dBeta, dNDash); double d2FX = -dNDash*f2X(dXStar, dA, dBeta, dNDash); double dLogK = 0.0, dGamma1 = dGamma; if(dGamma1 < 0.0){ dGamma1 *= -1.0; } //invert sign dLogK = gsl_sf_bessel_lnKnu(dGamma1,2.0*dA*dBeta); return -2.0*dA*dBeta -log(2.0) -dLogK -dGamma*log(dBeta) + dNDash*dFX + 0.5*log(2.0*M_PI) - 0.5*log(d2FX); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "sd"); exit(1); } } /***********************************************************************/ double DiversityUtils::sd(int n, double dAlpha, double dBeta, double dGamma){ try { double dA = 0.5*(-1.0 + sqrt(1.0 + (dAlpha*dAlpha)/(dBeta*dBeta))); double dN = (double) n, dNDash = dN + dGamma - 1.0, dRN = 1.0/dN; double dTemp1 = (0.5*dN)/(1.0 + dA), dTemp2 = 4.0*dRN*dRN*(1.0 + dA)*dA*dBeta*dBeta; double dXStar = dTemp1*(1.0 + sqrt(1.0 + dTemp2)); double dFX = fX(dXStar, dA, dBeta, dNDash); double d2FX = -dNDash*f2X(dXStar, dA, dBeta, dNDash); double dLogK = 0.0, dGamma1 = dGamma; if(dGamma1 < 0.0){ dGamma1 *= -1.0; } //invert sign dLogK = gsl_sf_bessel_lnKnu(dGamma1,2.0*dA*dBeta); return -2.0*dA*dBeta -log(2.0) -dLogK -dGamma*log(dBeta) + dNDash*dFX + 0.5*log(2.0*M_PI) - 0.5*log(d2FX); } catch(exception& e) { m->errorOut(e, "DiversityUtils", "sd"); exit(1); } } /***********************************************************************/ bool DiversityUtils::bessel(double* pdResult, int n, double dAlpha, double dBeta){ try { double dGamma = -0.5; double dResult = 0.0; double dOmega = 0.0, dGamma2 = 0.0; double dLogK1 = 0.0, dLogK2 = 0.0; double dN = (double) n, dNu = dGamma + dN; double dTemp1 = 0.0; if(dNu < 0.0){ dNu = -dNu; } if(dGamma < 0.0) { dGamma2 = -dGamma; } else { dGamma2 = dGamma; } dOmega = sqrt(dBeta*dBeta + dAlpha*dAlpha) - dBeta; dLogK2 = gsl_sf_bessel_lnKnu(dNu, dAlpha); if(!gsl_finite(dLogK2)){ if(dAlpha < 0.1*sqrt(dNu + 1.0)){ dLogK2 = gsl_sf_lngamma(dNu) + (dNu - 1.0)*log(2.0) - dNu*log(dAlpha); }else{ (*pdResult) = dResult; return false; } } dLogK1 = dGamma*log(dOmega/dAlpha) -gsl_sf_bessel_lnKnu(dGamma2,dOmega); dTemp1 = log((dBeta*dOmega)/dAlpha); dResult = dN*dTemp1 + dLogK2 + dLogK1; (*pdResult) = dResult; return true; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "bessel"); exit(1); } } /***********************************************************************/ bool DiversityUtils::bessel(double* pdResult, int n, double dAlpha, double dBeta, double dGamma){ try { double dResult = 0.0; double dOmega = 0.0, dGamma2 = 0.0; double dLogK1 = 0.0, dLogK2 = 0.0; double dN = (double) n, dNu = dGamma + dN; double dTemp1 = 0.0; if(dNu < 0.0){ dNu = -dNu; } if(dGamma < 0.0){ dGamma2 = -dGamma; } else { dGamma2 = dGamma; } dOmega = sqrt(dBeta*dBeta + dAlpha*dAlpha) - dBeta; dLogK2 = gsl_sf_bessel_lnKnu(dNu, dAlpha); if(!gsl_finite(dLogK2)){ if(dAlpha < 0.1*sqrt(dNu + 1.0)){ dLogK2 = gsl_sf_lngamma(dNu) + (dNu - 1.0)*log(2.0) - dNu*log(dAlpha); }else{ (*pdResult) = dResult; return false; } } dLogK1 = dGamma*log(dOmega/dAlpha) -gsl_sf_bessel_lnKnu(dGamma2,dOmega); dTemp1 = log((dBeta*dOmega)/dAlpha); dResult = dN*dTemp1 + dLogK2 + dLogK1; (*pdResult) = dResult; return true; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "bessel"); exit(1); } } /***********************************************************************/ vector DiversityUtils::outputResults(gsl_vector *ptX, t_Data *ptData, double (*f)(const gsl_vector*, void* params)){ try { vector results; double dAlpha = 0.0, dBeta = 0.0, dS = 0.0, dL = 0.0, dGamma = 0.0; dAlpha = gsl_vector_get(ptX, 0); dBeta = gsl_vector_get(ptX, 1); if ((method == "metrols") || (method == "metrosichel")) { dGamma = gsl_vector_get(ptX, 2); dS = gsl_vector_get(ptX, 3); }else { dS = gsl_vector_get(ptX, 2); } dL = f(ptX, ptData); results.push_back(dAlpha); results.push_back(dBeta); if (method == "metroig") { m->mothurOut("\nMetroIG - ML simplex: a = " + toString(dAlpha) + " b = " + toString(dBeta) + " S = " + toString(dS) + " NLL = " + toString(dL) + "\n"); results.push_back(dS); results.push_back(dL); } else if (method == "metroln") { m->mothurOut("\nMetroLogNormal - ML simplex: M = " + toString(dAlpha) + " V = " + toString(dBeta) + " S = " + toString(dS) + " NLL = " + toString(dL) + "\n"); results.push_back(dS); results.push_back(dL); } else if (method == "metrols") { m->mothurOut("\nMetroLogStudent - ML simplex: M = " + toString(dAlpha) + " V = " + toString(dBeta) + " Nu = " + toString(dGamma) + " S = " + toString(dS) + " NLL = " + toString(dL) + "\n"); results.push_back(dGamma); results.push_back(dS); results.push_back(dL); } else if (method == "metrosichel") { m->mothurOut("\nMetroSichel - ML simplex: a = " + toString(dAlpha) + " b = " + toString(dBeta) + " g = " + toString(dGamma) + " S = " + toString(dS) + " NLL = " + toString(dL) + "\n"); results.push_back(dGamma); results.push_back(dS); results.push_back(dL); } return results; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "outputResults"); exit(1); } } /***********************************************************************/ int DiversityUtils::minimiseSimplex(gsl_vector* ptX, size_t nP, void* pvData, double (*f)(const gsl_vector*, void* params), double initSimplexSize, double minSimplexSize, double maxSimplexSize){ try { const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex; gsl_multimin_fminimizer *s = nullptr; gsl_vector *ss; gsl_multimin_function minex_func; size_t iter = 0; int status; double size; /* Initial vertex size vector */ ss = gsl_vector_alloc (nP); if (method == "metroig") { gsl_vector_set_all(ss, initSimplexSize); gsl_vector_set(ss,nP - 1,0.1*gsl_vector_get(ptX,0)); } else if (method == "metroln") { gsl_vector_set_all(ss, initSimplexSize); gsl_vector_set(ss,2,0.1*gsl_vector_get(ptX,2)); } else if (method == "metrols" ) { for(int i = 0; i < nP; i++){ gsl_vector_set(ss, i,initSimplexSize*fabs(gsl_vector_get(ptX,i))); } }else if (method == "metrosichel" ) { for(int i = 0; i < nP; i++){ gsl_vector_set(ss, i,initSimplexSize*gsl_vector_get(ptX,i)); } } /* Initialize method and iterate */ minex_func.f = f; minex_func.n = nP; minex_func.params = pvData; s = gsl_multimin_fminimizer_alloc (T, nP); gsl_multimin_fminimizer_set(s, &minex_func, ptX, ss); do{ iter++; if (m->getControl_pressed()) { break; } status = gsl_multimin_fminimizer_iterate(s); if(status) { break; } size = gsl_multimin_fminimizer_size(s); status = gsl_multimin_test_size(size, minSimplexSize); if(status == GSL_SUCCESS){ for(int i = 0; i < nP; i++){ gsl_vector_set(ptX, i, gsl_vector_get(s->x, i)); } } } while(status == GSL_CONTINUE && iter < maxSimplexSize); if(status == GSL_CONTINUE){ for(int i = 0; i < nP; i++){ gsl_vector_set(ptX, i, gsl_vector_get(s->x, i)); } } gsl_vector_free(ss); gsl_multimin_fminimizer_free (s); return status; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "minimiseSimplex"); exit(1); } } /***********************************************************************/ void DiversityUtils::getProposal(gsl_rng *ptGSLRNG, gsl_vector *ptXDash, gsl_vector *ptX, int* pnSDash, int nS, t_Params *ptParams){ try { double dDeltaS = gsl_ran_gaussian(ptGSLRNG, ptParams->dSigmaS); double dDeltaA = gsl_ran_gaussian(ptGSLRNG, ptParams->dSigmaX); double dDeltaB = gsl_ran_gaussian(ptGSLRNG, ptParams->dSigmaY); double dDeltaN = 0; int nSDash = 0; if ((method == "metrols") || (method == "metrosichel")) { dDeltaN = gsl_ran_gaussian(ptGSLRNG, ptParams->dSigmaN); } gsl_vector_set(ptXDash, 0, gsl_vector_get(ptX,0) + dDeltaA); gsl_vector_set(ptXDash, 1, gsl_vector_get(ptX,1) + dDeltaB); if ((method == "metrols") || (method == "metrosichel")) { gsl_vector_set(ptXDash, 2, gsl_vector_get(ptX,2) + dDeltaN); } nSDash = nS + (int) floor(dDeltaS); if(nSDash < 1){ nSDash = 1; } (*pnSDash) = nSDash; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "getProposal"); exit(1); } } /***********************************************************************/ int DiversityUtils::fitSigma(vector acceptanceRates, vector parameterResults, int fitIters, t_Params *ptParams, t_Data *ptData, gsl_vector* ptX, void* f (void * pvInitMetro)){ try { double sigmaA = 0.1; vector defaults; defaults.push_back(ptParams->dSigmaX); defaults.push_back(ptParams->dSigmaY); defaults.push_back(ptParams->dSigmaN); acceptRatioPos defaultRatio = findBest(acceptanceRates); if (defaultRatio.acceptRatio <= 0.05) { return defaultRatio.pos; } for (int i = 0; i < parameterResults.size(); i++) { parameterResults[i] /= 10.0; } int numTries = 1; map, acceptRatioPos> sigmaToAccept; //sigma value -> acceptance ratio map> acceptToSigma; //acceptance ratio -> sigma value acceptRatioPos temp; //1.0 and pos 0 be default sigmaToAccept[parameterResults] = temp; //0.01 vector testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] /= 10.0; } sigmaToAccept[testTries] = temp; //0.01 testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] /= 100.0; } sigmaToAccept[testTries] = temp; //0.001 testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] /= 1000.0; } sigmaToAccept[testTries] = temp; //0.001 testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] /= 10000.0; } sigmaToAccept[testTries] = temp; //0.001 double newSigmaA = sigmaA + (sigmaA/2.0); //0.15 testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] += newSigmaA; } sigmaToAccept[testTries] = temp; testTries = parameterResults; for (int i = 0; i < testTries.size(); i++) { testTries[i] += sigmaA+sigmaA; } sigmaToAccept[testTries] = temp; //adjust around closest "high" and closest "low" values acceptRatioPos thisBestHigh, thisBestLow; map> acceptToSigmaHigh; //acceptance ratio -> sigma value map> acceptToSigmaLow; //acceptance ratio -> sigma value //set iters to 1000, get close to value then run with nIters int savedIters = ptParams->nIter; ptParams->nIter = 1000; for (map, acceptRatioPos>::iterator it = sigmaToAccept.begin(); it != sigmaToAccept.end(); it++) { if (m->getControl_pressed()) { break; } ptParams->dSigmaX = it->first[0]; ptParams->dSigmaY = it->first[1]; ptParams->dSigmaN = it->first[2]; acceptanceRates = mcmc(ptParams, ptData, ptX, f); it->second = findBest(acceptanceRates); if (it->second.high) { //high if (it->second.acceptRatio < thisBestHigh.acceptRatio) { thisBestHigh = it->second; } acceptToSigmaHigh[it->second] = it->first; }else { //low if (it->second.acceptRatio < thisBestLow.acceptRatio) { thisBestLow = it->second; } acceptToSigmaLow[it->second] = it->first; } acceptToSigma[it->second] = it->first; if (it->second.acceptRatio <= 0.05) { //try with nIters to confirm ptParams->nIter = savedIters; acceptanceRates = mcmc(ptParams, ptData, ptX, f); it->second = findBest(acceptanceRates); if (it->second.high) { //high if (it->second.acceptRatio < thisBestHigh.acceptRatio) { thisBestHigh = it->second; } acceptToSigmaHigh[it->second] = it->first; }else { //low if (it->second.acceptRatio < thisBestLow.acceptRatio) { thisBestLow = it->second; } acceptToSigmaLow[it->second] = it->first; } acceptToSigma[it->second] = it->first; //if good value if (it->second.acceptRatio <= 0.05) { return it->second.pos; } else { ptParams->nIter = 1000; } } } sigmaToAccept[defaults] = defaultRatio; acceptToSigma[defaultRatio] = defaults; vector factors; factors.resize(3, 0); bool badHigh = false; bool badLow = false; double badFactor = 0.0; vector badFactors; badFactors.resize(3, 0); //find best high and check map>::iterator itFind = acceptToSigma.find(thisBestHigh); if (itFind != acceptToSigma.end()) { if (thisBestHigh.acceptRatio > 0.25) { badHigh = true; for (int i = 0; i < badFactors.size(); i++) { badFactors[i] += itFind->second[i]; } }else { for (int i = 0; i < factors.size(); i++) { factors[i] += itFind->second[i]; } sigmaA = itFind->second[0]; } }//else no high values //find best low and check itFind = acceptToSigma.find(thisBestLow); if (itFind != acceptToSigma.end()) { if (thisBestLow.acceptRatio > 0.25) { //below 25% acceptance, lets disregard badLow = true; badHigh = true; for (int i = 0; i < badFactors.size(); i++) { badFactors[i] += itFind->second[i]; } }else { for (int i = 0; i < factors.size(); i++) { factors[i] += itFind->second[i]; } if (badHigh) { sigmaA = itFind->second[0]; } else { if (sigmaA > itFind->second[0]) { sigmaA = itFind->second[0]; } } } }//no low values if (badHigh && badLow) { double increment = badFactor / (double)(fitIters); sigmaA = acceptToSigma.begin()->second[0]; //sigma for best try sigmaA -= (increment*(fitIters/(double)2.0)); for (int i = 0; i < factors.size(); i++) { factors[i] = badFactors[i] / (double)(fitIters); } }else if (badHigh || badLow) { for (int i = 0; i < factors.size(); i++) { double increment = factors[i] / (double)(fitIters); sigmaA -= (increment*(fitIters/(double)2.0)); } }else { //good high and low for (int i = 0; i < factors.size(); i++) { factors[i] /= (double)(fitIters); } } for (int i = 0; i < factors.size(); i++) { if (util.isEqual(factors[i], 0)) { factors[i] = 0.1; } } ptParams->dSigmaX = acceptToSigma.begin()->second[0]; ptParams->dSigmaY = acceptToSigma.begin()->second[1]; ptParams->dSigmaN = acceptToSigma.begin()->second[2]; ptParams->nIter = savedIters; while ((thisBestLow.acceptRatio > 0.05) && (numTries < fitIters)) { if (m->getControl_pressed()) { break; } m->mothurOut("\nFit try: " + toString(numTries) + "\n"); ptParams->dSigmaX += factors[0]; ptParams->dSigmaY += factors[1]; ptParams->dSigmaN += factors[2]; vector theseSettings; theseSettings.push_back(ptParams->dSigmaX); theseSettings.push_back(ptParams->dSigmaY); theseSettings.push_back(ptParams->dSigmaN); map, acceptRatioPos>::iterator it = sigmaToAccept.find(theseSettings); if (it == sigmaToAccept.end()) { acceptanceRates = mcmc(ptParams, ptData, ptX, f); thisBestLow = findBest(acceptanceRates); acceptToSigma[thisBestLow] = theseSettings; sigmaToAccept[theseSettings] = thisBestLow; numTries++; } } if (numTries == fitIters) { vector theBestSettings = acceptToSigma.begin()->second; ptParams->dSigmaX = theBestSettings[0]; ptParams->dSigmaY = theBestSettings[1]; ptParams->dSigmaN = theBestSettings[2]; acceptanceRates = mcmc(ptParams, ptData, ptX, f); thisBestLow = findBest(acceptanceRates); } if ((thisBestLow.acceptRatio > 0.05)) { m->mothurOut("\n[ERROR]: Unable to reach acceptable ratio, please review and set sigma parameters manually.\n"); m->setControl_pressed(true); } return thisBestLow.pos; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "fitSigma"); exit(1); } } /***********************************************************************/ vector DiversityUtils::mcmc(t_Params *ptParams, t_Data *ptData, gsl_vector* ptX, void* f (void * pvInitMetro)){ try { int ptXSize = 3; if ((method == "metrols") || (method == "metrosichel")) { ptXSize = 4; } pthread_t thread1, thread2, thread3; int iret1 , iret2 , iret3; gsl_vector *ptX1 = gsl_vector_alloc(ptXSize), *ptX2 = gsl_vector_alloc(ptXSize), *ptX3 = gsl_vector_alloc(ptXSize); t_MetroInit atMetroInit[3]; if (method == "metrols") { m->mothurOut("\nMCMC iter = " + toString(ptParams->nIter) + " sigmaM = " + toString(ptParams->dSigmaX) + " sigmaV = " + toString(ptParams->dSigmaY) + " sigmaN = " + toString(ptParams->dSigmaN) + " sigmaS = " + toString(ptParams->dSigmaS) + "\n"); } else if (method == "metrosichel") { m->mothurOut("\nMCMC iter = " + toString(ptParams->nIter) + " sigmaA = " + toString(ptParams->dSigmaX) + " sigmaB = " + toString(ptParams->dSigmaY) + " sigmaG = " + toString(ptParams->dSigmaN) + " sigmaS = " + toString(ptParams->dSigmaS) + "\n"); } else { m->mothurOut("\nMCMC iter = " + toString(ptParams->nIter) + " sigmaX = " + toString(ptParams->dSigmaX) + " sigmaY = " + toString(ptParams->dSigmaY) + " sigmaS = " + toString(ptParams->dSigmaS) + "\n"); } gsl_vector_memcpy(ptX1, ptX); gsl_vector_set(ptX2, 0, gsl_vector_get(ptX,0) + 2.0*ptParams->dSigmaX); gsl_vector_set(ptX2, 1, gsl_vector_get(ptX,1) + 2.0*ptParams->dSigmaY); if ((method == "metrols") || (method == "metrosichel")) { gsl_vector_set(ptX2, 2, gsl_vector_get(ptX,2) + 2.0*ptParams->dSigmaN); gsl_vector_set(ptX2, 3, gsl_vector_get(ptX,3) + 2.0*ptParams->dSigmaS); } else { gsl_vector_set(ptX2, 2, gsl_vector_get(ptX,2) + 2.0*ptParams->dSigmaS); } gsl_vector_set(ptX3, 0, gsl_vector_get(ptX,0) - 2.0*ptParams->dSigmaX); gsl_vector_set(ptX3, 1, gsl_vector_get(ptX,1) - 2.0*ptParams->dSigmaY); if ((method == "metrols") || (method == "metrosichel")) { gsl_vector_set(ptX3, 2, gsl_vector_get(ptX,2) - 2.0*ptParams->dSigmaN); if(gsl_vector_get(ptX,3) - 2.0*ptParams->dSigmaS > (double) ptData->nL){ gsl_vector_set(ptX3, 3, gsl_vector_get(ptX,3) - 2.0*ptParams->dSigmaS); } else{ gsl_vector_set(ptX3, 3, (double) ptData->nL); } }else { if(gsl_vector_get(ptX,2) - 2.0*ptParams->dSigmaS > (double) ptData->nL){ gsl_vector_set(ptX3, 2, gsl_vector_get(ptX,2) - 2.0*ptParams->dSigmaS); } else{ gsl_vector_set(ptX3, 2, (double) ptData->nL); } } atMetroInit[0].ptParams = ptParams; atMetroInit[0].ptData = ptData; atMetroInit[0].ptX = ptX1; atMetroInit[0].nThread = 0; atMetroInit[0].lSeed = ptParams->lSeed; atMetroInit[0].nAccepted = 0; //write thread 0 if ((method == "metrols") || (method == "metrosichel")) { m->mothurOut(toString(atMetroInit[0].nThread) + ": a = " + toString(gsl_vector_get(ptX1, 0)) + " b = " + toString(gsl_vector_get(ptX1, 1)) + " g = " + toString(gsl_vector_get(ptX1, 2)) + " S = " + toString(gsl_vector_get(ptX1, 3)) + "\n"); } else { m->mothurOut(toString(atMetroInit[0].nThread) + ": a = " + toString(gsl_vector_get(ptX1, 0)) + " b = " + toString(gsl_vector_get(ptX1, 1)) + " S = " + toString(gsl_vector_get(ptX1, 2)) + "\n"); } atMetroInit[1].ptParams = ptParams; atMetroInit[1].ptData = ptData; atMetroInit[1].ptX = ptX2; atMetroInit[1].nThread = 1; atMetroInit[1].lSeed = ptParams->lSeed + 1; atMetroInit[1].nAccepted = 0; //write thread 1 if ((method == "metrols") || (method == "metrosichel")) { m->mothurOut(toString(atMetroInit[1].nThread) + ": a = " + toString(gsl_vector_get(ptX2, 0)) + " b = " + toString(gsl_vector_get(ptX2, 1)) + " g = " + toString(gsl_vector_get(ptX2, 2)) + " S = " + toString(gsl_vector_get(ptX2, 3)) + "\n"); } else { m->mothurOut(toString(atMetroInit[1].nThread) + ": a = " + toString(gsl_vector_get(ptX2, 0)) + " b = " + toString(gsl_vector_get(ptX2, 1)) + " S = " + toString(gsl_vector_get(ptX2, 2)) + "\n"); } atMetroInit[2].ptParams = ptParams; atMetroInit[2].ptData = ptData; atMetroInit[2].ptX = ptX3; atMetroInit[2].nThread = 2; atMetroInit[2].lSeed = ptParams->lSeed + 2; atMetroInit[2].nAccepted = 0; //write thread 2 if ((method == "metrols") || (method == "metrosichel")) { m->mothurOut(toString(atMetroInit[2].nThread) + ": a = " + toString(gsl_vector_get(ptX3, 0)) + " b = " + toString(gsl_vector_get(ptX3, 1)) + " g = " + toString(gsl_vector_get(ptX3, 2)) + " S = " + toString(gsl_vector_get(ptX3, 3)) + "\n"); } else { m->mothurOut(toString(atMetroInit[2].nThread) + ": a = " + toString(gsl_vector_get(ptX3, 0)) + " b = " + toString(gsl_vector_get(ptX3, 1)) + " S = " + toString(gsl_vector_get(ptX3, 2)) + "\n"); } iret1 = pthread_create(&thread1, nullptr, f, (void*) &atMetroInit[0]); iret2 = pthread_create(&thread2, nullptr, f, (void*) &atMetroInit[1]); iret3 = pthread_create(&thread3, nullptr, f, (void*) &atMetroInit[2]); pthread_join(thread1, nullptr); pthread_join(thread2, nullptr); pthread_join(thread3, nullptr); m->mothurOut(toString(atMetroInit[0].nThread) +": accept. ratio " + toString(atMetroInit[0].nAccepted) + "/" + toString(ptParams->nIter) + " = " + toString(((double) atMetroInit[0].nAccepted)/((double) ptParams->nIter)) + "\n"); m->mothurOut(toString(atMetroInit[1].nThread) +": accept. ratio " + toString(atMetroInit[1].nAccepted) + "/" + toString(ptParams->nIter) + " = " + toString(((double) atMetroInit[1].nAccepted)/((double) ptParams->nIter)) + "\n"); m->mothurOut(toString(atMetroInit[2].nThread) +": accept. ratio " + toString(atMetroInit[2].nAccepted) + "/" + toString(ptParams->nIter) + " = " + toString(((double) atMetroInit[2].nAccepted)/((double) ptParams->nIter)) + "\n"); vector results; results.push_back(atMetroInit[0].nAccepted/((double) ptParams->nIter)); results.push_back(atMetroInit[1].nAccepted/((double) ptParams->nIter)); results.push_back(atMetroInit[2].nAccepted/((double) ptParams->nIter)); gsl_vector_free(ptX1); gsl_vector_free(ptX2); gsl_vector_free(ptX3); return results; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "mcmc"); exit(1); } } #endif /***********************************************************************/ acceptRatioPos DiversityUtils::findBest(vector acceptanceRates){ try { double defaultSigmaAcc = fabs(0.5 - acceptanceRates[0]); //"0" version bool high = true; if ((0.5 - acceptanceRates[0]) > 0.0) { high = false; } acceptRatioPos defaultRatio(defaultSigmaAcc, 0, high); if (defaultRatio.acceptRatio > fabs(0.5 - acceptanceRates[1])) { //is the "1" version better? defaultRatio.acceptRatio = fabs(0.5 - acceptanceRates[1]); defaultRatio.pos = 1; defaultRatio.high = true; if ((0.5 - acceptanceRates[1]) > 0.0) { defaultRatio.high = false; } } if (defaultRatio.acceptRatio > fabs(0.5 - acceptanceRates[2])) { //is the "2" version better? defaultRatio.acceptRatio = fabs(0.5 - acceptanceRates[2]); defaultRatio.pos = 2; defaultRatio.high = true; if ((0.5 - acceptanceRates[2]) > 0.0) { defaultRatio.high = false; } } return defaultRatio; } catch(exception& e) { m->errorOut(e, "DiversityUtils", "findBest"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/diversityutils.hpp000066400000000000000000000052421424121717000225540ustar00rootroot00000000000000// // diversityutils.hpp // Mothur // // Created by Sarah Westcott on 4/11/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef diversityutils_hpp #define diversityutils_hpp #define HI_PRECISION 1.0e-12 #define LO_PRECISION 1.0e-7 #define V_MULT 25.0 #define PENALTY 1.0e20 #define SLICE 10 #include "diversitycalc.h" /***********************************************************************/ class DiversityUtils { public: DiversityUtils(string met){ m = MothurOut::getInstance(); method = met; } #ifdef USE_GSL double logLikelihood(int n, double dAlpha, double dBeta); double logLikelihood(int n, double dAlpha, double dBeta, double); bool bessel(double* pdResult, int n, double dAlpha, double dBeta); double sd(int n, double dAlpha, double dBeta); bool bessel(double* pdResult, int n, double dAlpha, double dBeta, double); double sd(int n, double dAlpha, double dBeta, double); int minimiseSimplex(gsl_vector* ptX, size_t nP, void* pvData, double (*f)(const gsl_vector*, void* params), double, double, double); vector mcmc(t_Params *ptParams, t_Data *ptData, gsl_vector* ptX, void* f (void * pvInitMetro)); vector outputResults(gsl_vector *ptX, t_Data *ptData, double (*f)(const gsl_vector*, void* params)); void getProposal(gsl_rng *ptGSLRNG, gsl_vector *ptXDash, gsl_vector *ptX, int* pnSDash, int nS, t_Params *ptParams); int solveF(double x_lo, double x_hi, void* params, double tol, double *xsolve); int solveF(double x_lo, double x_hi, double (*f)(double, void*), void* params, double tol, double *xsolve); double logLikelihoodRampal(int n, double dMDash, double dV); double logLikelihoodQuad(int n, double dMDash, double dV); double logLikelihoodRampal(int n, double dMDash, double dV, double dNu); double logLikelihoodQuad(int n, double dMDash, double dV, double dNu); double calcMu(void *ptLNParams); int fitSigma(vector, vector, int fi, t_Params *ptParams, t_Data *ptData, gsl_vector* ptX, void* f (void * pvInitMetro)); #endif double f2X(double x, double dA, double dB, double dNDash); double fX(double x, double dA, double dB, double dNDash); double chao(t_Data *ptData); double logStirlingsGamma(double dZ); void loadAbundance(t_Data *ptData, SAbundVector* rank); void freeAbundance(t_Data *ptData); MothurOut* m; private: Utils util; string method; acceptRatioPos findBest(vector accept); }; /***********************************************************************/ #endif /* diversityutils_hpp */ mothur-1.48.0/source/calculators/eachgapdist.cpp000066400000000000000000000073731424121717000217270ustar00rootroot00000000000000// // eachgapdist.cpp // Mothur // // Created by Sarah Westcott on 4/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "eachgapdist.h" /***********************************************************************/ double eachGapDist::calcDist(Sequence A, Sequence B){ try { string seqA = A.getAligned(); string seqB = B.getAligned(); int alignLength = (int)seqA.length(); int start = setStart(seqA, seqB); int end = setEnd(seqA, seqB); int maxMinLength = end - start + 1; int diff = 0; for(int i=start;i cutoff) { return 1.0000; } } if(maxMinLength == 0) { dist = 1.0000; } else { dist = ((double)diff / (double)maxMinLength); } return dist; } catch(exception& e) { m->errorOut(e, "eachGapDist", "calcDist"); exit(1); } } /***********************************************************************/ vector eachGapDist::calcDist(Sequence A, classifierOTU otu, vector cols){ //this function calcs the distance using only the columns provided try { vector dists; dists.resize(otu.numSeqs, 0.0); //if you didn't select columns, use all columns if (cols.size() == 0) { for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); } } classifierOTU seq(A.getAligned()); vector starts = setStarts(seq, otu, cols); vector ends = setEnds(seq, otu, cols); int alignLength = cols.size(); for (int h = 0; h < otu.numSeqs; h++) { if (m->getControl_pressed()) { break; } int maxMinLength = ends[h] - starts[h] + 1; int difference = 0; for(int i=starts[h];i otuChars = otu.otuData[cols[i]]; char seqB = otuChars[0]; //assume column if identical if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; } if(seqA == '.' && seqB == '.'){ i+=alignLength; } //reached terminal gaps, so quit else if((seqA == '-' && seqB == '-') || (seqA == '-' && seqB == '.') || (seqA == '.' && seqB == '-')){ maxMinLength--; } //comparing gaps, ignore else{ if(seqA != seqB){ difference++; } } double distance = 1.0; distance = (double)difference / maxMinLength; if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break; } if(maxMinLength == 0) { dists[h] = 1.0000; } else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set } return dists; } catch(exception& e) { m->errorOut(e, "oneGapDist", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/eachgapdist.h000077500000000000000000000013241424121717000213650ustar00rootroot00000000000000#ifndef EACHGAPDIST_H #define EACHGAPDIST_H /* * eachgapdist.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /**************************************************************************************************/ class eachGapDist : public DistCalc { public: eachGapDist(double c) : DistCalc(c) {} double calcDist(Sequence A, Sequence B); vector calcDist(Sequence A, classifierOTU otu, vector cols); string getCitation() { return "http://mothur.org"; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/eachgapignore.cpp000066400000000000000000000105521424121717000222400ustar00rootroot00000000000000// // eachgapignore.cpp // Mothur // // Created by Sarah Westcott on 4/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "eachgapignore.h" /***********************************************************************/ double eachGapIgnoreTermGapDist::calcDist(Sequence A, Sequence B){ try { string seqA = A.getAligned(); string seqB = B.getAligned(); int alignLength = seqA.length(); bool overlap = false; int start = setStartIgnoreTermGap(seqA, seqB, overlap); int end = setEndIgnoreTermGap(seqA, seqB, overlap); //non-overlapping sequences if (!overlap) { return 1.0000; } int maxMinLength = end - start + 1; int diff = 0; for(int i=start;i cutoff) { return 1.0000; } } if(maxMinLength == 0) { dist = 1.0000; } else { dist = ((double)diff / (double)maxMinLength); } return dist; } catch(exception& e) { m->errorOut(e, "eachGapIgnoreTermGapDist", "calcDist"); exit(1); } } /***********************************************************************/ vector eachGapIgnoreTermGapDist::calcDist(Sequence A, classifierOTU otu, vector cols){ //this function calcs the distance using only the columns provided try { vector dists; dists.resize(otu.numSeqs, 0.0); //if you didn't select columns, use all columns if (cols.size() == 0) { for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); } } classifierOTU seq(A.getAligned()); vector starts = setStartsIgnoreTermGap(seq, otu, cols); vector ends = setEndsIgnoreTermGap(seq, otu, cols); int alignLength = cols.size(); for (int h = 0; h < otu.numSeqs; h++) { if (m->getControl_pressed()) { break; } if ((starts[h] == -1) && (ends[h] == -1)) { dists[h] = 1.0000; } //no overlap else { if (starts[h] == -1) { starts[h] = 0; } if (ends[h] == -1) { ends[h] = 0; } int maxMinLength = ends[h] - starts[h] + 1; int difference = 0; for(int i=starts[h];i otuChars = otu.otuData[cols[i]]; char seqB = otuChars[0]; //assume column if identical if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; } if(seqA == '.' || seqB == '.'){ i+=alignLength; } //terminal gaps, break; else if((seqA == '-' && seqB == '-') || (seqA == '-' && seqB == '.') || (seqA == '.' && seqB == '-')){ maxMinLength--; } //comparing gaps, ignore else{ if(seqA != seqB){ difference++; } } double distance = 1.0; distance = (double)difference / maxMinLength; if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break; } if(maxMinLength == 0) { dists[h] = 1.0000; } else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set } } return dists; } catch(exception& e) { m->errorOut(e, "eachGapIgnoreTermGapDist", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/eachgapignore.h000077500000000000000000000013611424121717000217060ustar00rootroot00000000000000#ifndef EACHGAPIGNORE_H #define EACHGAPIGNORE_H /* * eachgapignore.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /**************************************************************************************************/ class eachGapIgnoreTermGapDist : public DistCalc { public: eachGapIgnoreTermGapDist(double c) : DistCalc(c) {} double calcDist(Sequence A, Sequence B); vector calcDist(Sequence A, classifierOTU otu, vector cols); string getCitation() { return "http://mothur.org"; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/efron.cpp000077500000000000000000000013301424121717000205520ustar00rootroot00000000000000/* * efron.cpp * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "efron.h" /***********************************************************************/ EstOutput Efron::getValues(SAbundVector* rank){ try { data.resize(1,0); double n = (double)rank->getNumSeqs(); if(f > n || f == 0) { f = n; } double sum = 0; for(int i = 1; i < rank->size(); i++){ sum += pow(-1., i+1) * pow(((double)f / n), i) * (double)(rank->get(i)); } data[0] = sum; return data; } catch(exception& e) { m->errorOut(e, "Efron", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/efron.h000077500000000000000000000014021424121717000202170ustar00rootroot00000000000000#ifndef EFRON_H #define EFRON_H /* * efron.h * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the efron calculator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Efron : public Calculator { public: Efron(int size) : f(size), Calculator("efron", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Efron"; } private: int f; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/erarefaction.cpp000066400000000000000000000036261424121717000221120ustar00rootroot00000000000000// // erarefaction.cpp // Mothur // // Created by Sarah Westcott on 4/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "erarefaction.hpp" /***********************************************************************/ ERarefaction::ERarefaction(int inc) : increment(inc), DiversityCalculator(true) {} /***********************************************************************/ void ERarefaction::getValues(SAbundVector* rank, vector& values){ try { int maxRank = rank->getMaxRank(); int sampled = rank->getNumSeqs(); //nl int numOTUs = rank->getNumBins(); //ns for (int n = 1; n <= sampled; n++) { if((n % increment) == 0){ double dSum = 0.0; #ifdef USE_GSL double dDenom = gsl_sf_lnchoose(sampled, n); for(int i = 1; i <= maxRank; i++){ if (m->getControl_pressed()) { break; } int abund = rank->get(i); if (abund != 0) { int thisRank = i; //nA if(sampled - thisRank >= n){ double dNumer = gsl_sf_lnchoose(sampled - thisRank, n); dSum += ((double) abund)*exp(dNumer - dDenom); } } } #endif double result = ((double) numOTUs) - dSum; if (isnan(result) || isinf(result)) { result = 0; } values.push_back(result); } } } catch(exception& e) { m->errorOut(e, "ERarefaction", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/erarefaction.hpp000066400000000000000000000011771424121717000221160ustar00rootroot00000000000000// // erarefaction.hpp // Mothur // // Created by Sarah Westcott on 4/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef erarefaction_hpp #define erarefaction_hpp #include "diversitycalc.h" /***********************************************************************/ class ERarefaction : public DiversityCalculator { public: ERarefaction(int inc); void getValues(SAbundVector* rank, vector&); string getTag() { return "e"; } private: int increment; }; /***********************************************************************/ #endif /* erarefaction_hpp */ mothur-1.48.0/source/calculators/f1score.cpp000077500000000000000000000014171424121717000210110ustar00rootroot00000000000000// // f1score.cpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "f1score.hpp" /***********************************************************************/ double F1Score::getValue(double tp, double tn, double fp, double fn) { try { long long p = 2.0 * tp; long long pPrime = fn + fp; double f1Score = 2.0 * tp / (double) (p + pPrime); if(p + pPrime == 0) { f1Score = 0; } if (isnan(f1Score) || isinf(f1Score)) { f1Score = 0; } return f1Score; } catch(exception& e) { m->errorOut(e, "F1Score", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/f1score.hpp000077500000000000000000000012021424121717000210060ustar00rootroot00000000000000// // f1score.hpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef f1score_hpp #define f1score_hpp #include "calculator.h" /***********************************************************************/ class F1Score : public ClusterMetric { public: F1Score() : ClusterMetric("f1score") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/F1Score"; } private: }; /***********************************************************************/ #endif /* f1score_hpp */ mothur-1.48.0/source/calculators/fdr.cpp000077500000000000000000000014321424121717000202170ustar00rootroot00000000000000// // fdr.cpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "fdr.hpp" /***********************************************************************/ double FDR::getValue(double tp, double tn, double fp, double fn) { try { long long pPrime = tp + fp; double falseDiscoveryRate = fp / (double) pPrime; if(pPrime == 0) { falseDiscoveryRate = 0; } if (isnan(falseDiscoveryRate) || isinf(falseDiscoveryRate)) { falseDiscoveryRate = 0; } return (1.0-falseDiscoveryRate); } catch(exception& e) { m->errorOut(e, "FDR", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/fdr.hpp000077500000000000000000000011411424121717000202210ustar00rootroot00000000000000// // fdr.hpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fdr_hpp #define fdr_hpp #include "calculator.h" /***********************************************************************/ class FDR : public ClusterMetric { public: FDR() : ClusterMetric("fdr") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/FDR"; } private: }; /***********************************************************************/ #endif /* fdr_hpp */ mothur-1.48.0/source/calculators/filters.h000077500000000000000000000062071424121717000205660ustar00rootroot00000000000000#ifndef FILTERS_H #define FILTERS_H /* * filters.h * Mothur * * Created by Sarah Westcott on 6/29/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "sequence.hpp" #include "utils.hpp" /***********************************************************************/ class Filters { public: Filters() { m = MothurOut::getInstance(); numSeqs = 0; }; ~Filters(){}; string getFilter() { return filter; } void setFilter(string s) { filter = s; } void setLength(int l) { alignmentLength = l; } void setSoft(float s) { soft = s; } void setTrump(float t) { trump = t; } void setNumSeqs(int num) { numSeqs = num; } vector a, t, g, c, gap; void initialize() { a.assign(alignmentLength, 0); t.assign(alignmentLength, 0); g.assign(alignmentLength, 0); c.assign(alignmentLength, 0); gap.assign(alignmentLength, 0); } void doSoft() { int threshold = int (soft * numSeqs); for(int i=0;i> filter; fileHandle.close(); if (filter.length() != alignmentLength) { m->mothurOut("[ERROR]: Sequences are not all the same length as the filter, please correct.\n"); m->setControl_pressed(true); } } void getFreqs(Sequence seq) { string curAligned = seq.getAligned(); getFreqs(curAligned); } void getFreqs(string seq) { string curAligned = seq; numSeqs++; for(int j=0;jerrorOut(e, "FN", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/fn.hpp000077500000000000000000000011311424121717000200500ustar00rootroot00000000000000// // fn.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fn_hpp #define fn_hpp #include "calculator.h" /***********************************************************************/ class FN : public ClusterMetric { public: FN() : ClusterMetric("fn") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/FN"; } private: }; /***********************************************************************/ #endif /* fn_hpp */ mothur-1.48.0/source/calculators/fp.cpp000077500000000000000000000011771424121717000200570ustar00rootroot00000000000000// // fp.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "fp.hpp" /***********************************************************************/ double FP::getValue(double tp, double tn, double fp, double fn) { try { double fpmin = fp / (double)(tp + tn + fp + fn); if (isnan(fpmin) || isinf(fpmin)) { fpmin = 0; } return (1.0 - fpmin); } catch(exception& e) { m->errorOut(e, "FP", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/fp.hpp000077500000000000000000000011311424121717000200520ustar00rootroot00000000000000// // fp.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fp_hpp #define fp_hpp #include "calculator.h" /***********************************************************************/ class FP : public ClusterMetric { public: FP() : ClusterMetric("fp") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/FP"; } private: }; /***********************************************************************/ #endif /* fp_hpp */ mothur-1.48.0/source/calculators/fpfn.cpp000077500000000000000000000012631424121717000203770ustar00rootroot00000000000000// // fpfn.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "fpfn.hpp" /***********************************************************************/ double FPFN::getValue(double tp, double tn, double fp, double fn) { try { long long p = fp + fn; double fpfn = 1.0 - (p / (double)(tp + tn + fp + fn)); //minimize if (isnan(fpfn) || isinf(fpfn)) { fpfn = 0; } return fpfn; } catch(exception& e) { m->errorOut(e, "FPFN", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/fpfn.hpp000077500000000000000000000011501424121717000203770ustar00rootroot00000000000000// // fpfn.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef fpfn_hpp #define fpfn_hpp #include "calculator.h" /***********************************************************************/ class FPFN : public ClusterMetric { public: FPFN() : ClusterMetric("fpfn") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/FPFN"; } private: }; /***********************************************************************/ #endif /* fpfn_hpp */ mothur-1.48.0/source/calculators/geom.cpp000077500000000000000000000041771424121717000204040ustar00rootroot00000000000000/* * geom.cpp * Mothur * * Created by Thomas Ryabin on 2/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "geom.h" /***********************************************************************/ double Geom::kEq(double k, double spec){ return k/(1-k)*pow(1-k, spec)/(1-pow(1-k, spec)); } RAbundVector Geom::getRAbundVector(SAbundVector* rank){ vector rData; int mr = 1; int nb = 0; int ns = 0; for(int i = rank->size()-1; i > 0; i--) { int cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; nb += cur; ns += i*cur; for(int j = 0; j < cur; j++) rData.push_back(i); } RAbundVector rav = RAbundVector(rData, mr, nb, ns); rav.setLabel(rank->getLabel()); return rav; } /***********************************************************************************/ /***********************************************************************************/ EstOutput Geom::getValues(SAbundVector* rank){ try { data.resize(3,0); rdata = getRAbundVector(rank); double numInd = rdata.getNumSeqs(); double numSpec = rdata.getNumBins(); double min = rdata.get(rdata.size()-1); double k = .5; double step = .49999; while(fabs(min - numInd*kEq(k, (double)numSpec)) > .0001) { //This uses a binary search to find the value of k. if(numInd*kEq(k, numSpec) > min) k += step; else k -= step; step /= 2; } double cK = 1/(1-pow(1-k, numSpec)); double sumExp = 0; double sumObs = 0; double maxDiff = 0; for(int i = 0; i < numSpec; i++) { sumObs += rdata.get(i); sumExp += numInd*cK*k*pow(1-k, i); double diff = fabs(sumObs-sumExp); if(diff > maxDiff) { maxDiff = diff; } } data[0] = maxDiff/numInd; data[1] = 0.886/sqrt(numSpec); data[2] = 1.031/sqrt(numSpec); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Geom", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/geom.h000077500000000000000000000015221424121717000200400ustar00rootroot00000000000000#ifndef GEOM_H #define GEOM_H /* * geom.h * Mothur * * Created by Thomas Ryabin on 2/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the geometric estimator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Geom : public Calculator { public: Geom() : Calculator("geometric", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Geometric"; } private: double kEq(double, double); RAbundVector getRAbundVector(SAbundVector*); RAbundVector rdata; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/goodscoverage.cpp000077500000000000000000000013361424121717000222760ustar00rootroot00000000000000/* * goodscoverage.cpp * Mothur * * Created by Thomas Ryabin on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "goodscoverage.h" #include "calculator.h" /**********************************************************/ EstOutput GoodsCoverage::getValues(SAbundVector* rank){ try { data.resize(1,0); double numSingletons = rank->get(1); double totalIndividuals = rank->getNumSeqs(); data[0] = 1 - numSingletons/totalIndividuals; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "GoodsCoverage", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/goodscoverage.h000077500000000000000000000014341424121717000217420ustar00rootroot00000000000000#ifndef GOODSCOVERAGE_H #define GOODSCOVERAGE_H /* * goodscoverage.h * Mothur * * Created by Thomas Ryabin on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the LogSD estimator on single group. It is a child of the calculator class. */ /***********************************************************************/ class GoodsCoverage : public Calculator { public: GoodsCoverage() : Calculator("goodscoverage", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/GoodsCoverage"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/gower.cpp000077500000000000000000000025501424121717000205710ustar00rootroot00000000000000/* * gower.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "gower.h" /***********************************************************************/ EstOutput Gower::getValues(vector shared) { try { data.resize(1,0); vector maxOtus; maxOtus.resize(shared[0]->getNumBins()); vector minOtus; minOtus.resize(shared[0]->getNumBins()); //for each otu for (int i = 0; i < shared[0]->getNumBins(); i++) { //set otus min and max to first one minOtus[i] = shared[0]->get(i); maxOtus[i] = shared[0]->get(i); //for each group for (int j = 1; j < shared.size(); j++) { maxOtus[i] = max(shared[j]->get(i), maxOtus[i]); minOtus[i] = min(int(shared[j]->get(i)), minOtus[i]); } } double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); double numerator = abs(A - B); double denominator = maxOtus[i] - minOtus[i]; if (!util.isEqual(denominator, 0)) { sum += (numerator / denominator); } } data[0] = sum; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Gower", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/gower.h000077500000000000000000000013031424121717000202310ustar00rootroot00000000000000#ifndef GOWER_H #define GOWER_H /* * gower.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Gower : public Calculator { public: Gower() : Calculator("gower", 1, false, true) {}; //the true means this calculator needs all groups to calculate the pair value EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Gower"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/hamming.cpp000077500000000000000000000016221424121717000210650ustar00rootroot00000000000000/* * hamming.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "hamming.h" /***********************************************************************/ EstOutput Hamming::getValues(vector shared) { try { data.resize(1,0); int numA = 0; int numB = 0; int numShared = 0; //calc the 2 denominators for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); if (A != 0) { numA++; } if (B != 0) { numB++; } if ((A != 0) && (B != 0)) { numShared++; } } data[0] = numA + numB - (2 * numShared); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Hamming", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/hamming.h000077500000000000000000000011751424121717000205350ustar00rootroot00000000000000#ifndef HAMMING_H #define HAMMING_H /* * hamming.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Hamming : public Calculator { public: Hamming() : Calculator("hamming", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Hamming"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/heip.cpp000077500000000000000000000013711424121717000203730ustar00rootroot00000000000000/* * heip.cpp * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "heip.h" #include "shannon.h" /***********************************************************************/ EstOutput Heip::getValues(SAbundVector* rank){ try { data.resize(1,0.0000); vector shanData(3,0); Shannon* shannon = new Shannon(); shanData = shannon->getValues(rank); long int sobs = rank->getNumBins(); if(sobs > 1){ data[0] = (exp(shanData[0])-1) / (sobs - 1);; } else{ data[0] = 1; } delete shannon; return data; } catch(exception& e) { m->errorOut(e, "Heip", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/heip.h000077500000000000000000000011271424121717000200370ustar00rootroot00000000000000#ifndef HEIP #define HEIP /* * heip.h * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Heip : public Calculator { public: Heip() : Calculator("heip", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Heip"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/hellinger.cpp000077500000000000000000000020751424121717000214210ustar00rootroot00000000000000/* * hellinger.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "hellinger.h" /***********************************************************************/ EstOutput Hellinger::getValues(vector shared) { try { data.resize(1,0); double sumA = 0.0; double sumB = 0.0; //calc the 2 denominators for (int i = 0; i < shared[0]->getNumBins(); i++) { sumA += shared[0]->get(i); sumB += shared[1]->get(i); } //calc sum double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); double term1 = sqrt((Aij / sumA)); double term2 = sqrt((Bij / sumB)); sum += ((term1 - term2) * (term1 - term2)); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Hellinger", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/hellinger.h000077500000000000000000000012131424121717000210570ustar00rootroot00000000000000#ifndef HELLINGER_H #define HELLINGER_H /* * hellinger.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Hellinger : public Calculator { public: Hellinger() : Calculator("hellinger", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Hellinger"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/igabundance.cpp000066400000000000000000000033111424121717000216770ustar00rootroot00000000000000// // igabundance.cpp // Mothur // // Created by Sarah Westcott on 4/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "igabundance.hpp" /***********************************************************************/ IGAbundance::IGAbundance() : DiversityCalculator(true) {} /***********************************************************************/ vector IGAbundance::getValues(int maxRank, vector& sampling) { try { maxRank = floor(pow(2.0,ceil(log((double) maxRank)/log(2.0)) + 2.0) + 1.0e-7); //nMax results.resize(maxRank, 0.0); int nSamples = sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL DiversityUtils dutils("igabund"); for(int i = 0; i < sampling.size(); i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= maxRank; j++) { int nA = j; double dLog = 0.0, dP = 0.0; dLog = dutils.logLikelihood(nA, sampling[i].alpha, sampling[i].beta); dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=maxRank; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "IGAbundance", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/igabundance.hpp000066400000000000000000000012321424121717000217040ustar00rootroot00000000000000// // igabundance.hpp // Mothur // // Created by Sarah Westcott on 4/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef igabundance_hpp #define igabundance_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class IGAbundance : public DiversityCalculator { public: IGAbundance(); vector getValues(int mr, vector& sampling); string getTag() { return "ig"; } private: Utils util; }; /***********************************************************************/ #endif /* igabundance_hpp */ mothur-1.48.0/source/calculators/ignoregaps.cpp000066400000000000000000000202061424121717000215770ustar00rootroot00000000000000// // ignoregaps.cpp // Mothur // // Created by Sarah Westcott on 4/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "ignoregaps.h" /***********************************************************************/ double ignoreGaps::calcDist(Sequence A, Sequence B){ try { int diff = 0; int start = 0; int end = 0; bool overlap = false; string seqA = A.getAligned(); string seqB = B.getAligned(); int alignLength = (int)seqA.length(); for(int i=0;i=0;i--){ if(seqA[i] != '.' && seqB[i] != '.'){ end = i; overlap = true; break; } } //non-overlapping sequences if (!overlap) { return 1.0000; } int maxMinLength = end - start + 1; for(int i=start; i cutoff) { return 1.0000; } } if(maxMinLength == 0) { dist = 1.0000; } else { dist = ((double)diff / (double)maxMinLength); } return dist; } catch(exception& e) { m->errorOut(e, "ignoreGaps", "calcDist"); exit(1); } } /***********************************************************************/ vector ignoreGaps::calcDist(Sequence A, classifierOTU otu, vector cols){ //this function calcs the distance using only the columns provided try { vector dists; dists.resize(otu.numSeqs, 0.0); //if you didn't select columns, use all columns if (cols.size() == 0) { for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); } } classifierOTU seq(A.getAligned()); vector starts = setStarts(seq, otu, cols); vector ends = setEnds(seq, otu, cols); int alignLength = cols.size(); for (int h = 0; h < otu.numSeqs; h++) { if (m->getControl_pressed()) { break; } if ((starts[h] == -1) && (ends[h] == -1)) { dists[h] = 1.0000; } //no overlap else { if (starts[h] == -1) { starts[h] = 0; } if (ends[h] == -1) { ends[h] = 0; } int maxMinLength = ends[h] - starts[h] + 1; int difference = 0; for(int i=starts[h];i otuChars = otu.otuData[cols[i]]; char seqB = otuChars[0]; //assume column if identical if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; } if(seqA == '.' || seqB == '.'){ i+=alignLength; } else if((seqA != '-' && seqB != '-')){ if(seqA != seqB){ difference++; } }else { maxMinLength--; } double distance = 1.0; distance = (double)difference / maxMinLength; if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break; } if(maxMinLength == 0) { dists[h] = 1.0000; } else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set } } return dists; } catch(exception& e) { m->errorOut(e, "ignoreGaps", "calcDist"); exit(1); } } /***********************************************************************/ vector ignoreGaps::setStarts(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector starts; starts.resize(otu.numSeqs, -1); int alignLength = cols.size(); int seqAStart = 0; for(int i=0;i thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if (thisChar == '.') { } //every seq in otu is a '.' or '-' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < starts.size(); k++) { if ((starts[k] == -1) && (seqA.otuData[cols[i]][0] != '.')) { starts[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "ignoreGaps", "setStarts"); exit(1); } } /***********************************************************************/ vector ignoreGaps::setEnds(classifierOTU seqA, classifierOTU otu, vector cols){ try { vector ends; ends.resize(otu.numSeqs, -1); int alignLength = cols.size(); int seqAEnd = 0; for(int i=alignLength-1;i>=0;i--){//for each column we want to include if (seqA.otuData[cols[i]][0] != '.') { seqAEnd = i; break; } } //set start positions int numset = 0; for(int i=seqAEnd;i>=0;i--){ //for each column we want to include if(numset == otu.numSeqs) { break; } vector thisColumn = otu.otuData[cols[i]]; if (thisColumn.size() != otu.numSeqs) { //all seqs at this spot are identical char thisChar = thisColumn[0]; if (thisChar == '.'){ } //every seq in otu is a '.' at this location, move to next column else { //this is a base in all locations, you are done for (int k = 0; k < ends.size(); k++) { if ((ends[k] == -1) && (seqA.otuData[cols[i]][0] != '.')){ ends[k] = i; numset++; } //any unset starts are set to this location } break; } }else{ for(int j=0;jerrorOut(e, "ignoreGaps", "setEnds"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/ignoregaps.h000077500000000000000000000020631424121717000212500ustar00rootroot00000000000000#ifndef IGNOREGAPS_H #define IGNOREGAPS_H /* * ignoregaps.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /**************************************************************************************************/ // this class calculates distances by ignoring all gap characters. so if seq a has an "A" and seq // b has a '-', there is no penalty class ignoreGaps : public DistCalc { public: ignoreGaps(double c) : DistCalc(c) {} vector calcDist(Sequence A, classifierOTU otu, vector cols); double calcDist(Sequence A, Sequence B); //calc distance between 2 seqeunces string getCitation() { return "http://mothur.org"; } private: vector setStarts(classifierOTU seqA, classifierOTU otu, vector cols); vector setEnds(classifierOTU seqA, classifierOTU otu, vector cols); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/igrarefaction.cpp000066400000000000000000000054551424121717000222670ustar00rootroot00000000000000// // igrarefaction.cpp // Mothur // // Created by Sarah Westcott on 5/6/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "igrarefaction.hpp" /***********************************************************************/ IGRarefaction::IGRarefaction(double c) : coverage(c), DiversityCalculator(true) {} /***********************************************************************/ inline int compare_doubles1(const void* a, const void* b) { double* arg1 = (double *) a; double* arg2 = (double *) b; if( *arg1 < *arg2 ) return -1; else if( *arg1 == *arg2 ) return 0; else return 1; } /***********************************************************************/ vector IGRarefaction::getValues(int numSeqs, vector& sampling){ //numSeqs = rank->getNumSeqs(); //nj try { #ifdef USE_GSL DiversityUtils dutils("igrarefaction"); int nSamples = sampling.size(); double* adMu = nullptr; double dLower = 0.0, dMedian = 0.0, dUpper = 0.0; gsl_set_error_handler_off(); t_IGParams* atIGParams; atIGParams = (t_IGParams *) malloc(nSamples*sizeof(t_IGParams)); //MAX_SAMPLES //load sampling data for (int i = 0; i < nSamples; i++) { if (m->getControl_pressed()) { free(atIGParams); return results; } atIGParams[i].dAlpha = sampling[i].alpha; atIGParams[i].dBeta = sampling[i].beta; atIGParams[i].nS = sampling[i].ns; atIGParams[i].dC = coverage; } adMu = (double *) malloc(sizeof(double)*nSamples); for(int i = 0; i < nSamples; i++){ adMu[i] = ((double) numSeqs)*dutils.calcMu(&atIGParams[i]); } qsort(adMu, nSamples, sizeof(double), compare_doubles1); dLower = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.025); dMedian = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.5); dUpper = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.975); m->mothurOut("\nIGRarefaction - d_Lower = " + toString(dLower) + " d_Median = " + toString(dMedian) + " d_Upper = " + toString(dUpper) + "\n\n"); if (isnan(dLower) || isinf(dLower)) { dLower = 0; } if (isnan(dMedian) || isinf(dMedian)) { dMedian = 0; } if (isnan(dUpper) || isinf(dUpper)) { dUpper = 0; } results.push_back(dLower); results.push_back(dMedian); results.push_back(dUpper); free(adMu); free(atIGParams); #endif return results; } catch(exception& e) { m->errorOut(e, "IGRarefaction", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/igrarefaction.hpp000066400000000000000000000013561424121717000222700ustar00rootroot00000000000000// // igrarefaction.hpp // Mothur // // Created by Sarah Westcott on 5/6/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef igrarefaction_hpp #define igrarefaction_hpp #include "diversityutils.hpp" #include "diversitycalc.h" //IGRarefaction /***********************************************************************/ class IGRarefaction : public DiversityCalculator { public: IGRarefaction(double c); ~IGRarefaction() = default; vector getValues(int ns, vector& sampling); string getTag() { return "ig"; } private: double coverage; }; /***********************************************************************/ #endif /* igrarefaction_hpp */ mothur-1.48.0/source/calculators/invsimpson.cpp000077500000000000000000000015021424121717000216470ustar00rootroot00000000000000/* * invsimpson.cpp * Mothur * * Created by Pat Schloss on 8/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "invsimpson.h" #include "simpson.h" /***********************************************************************/ EstOutput InvSimpson::getValues(SAbundVector* rank){ try { //vector simpsonData(3,0); data.resize(3,0); vector simpData(3,0); Simpson* simp = new Simpson(); simpData = simp->getValues(rank); if(!util.isEqual(simpData[0], 0)){ data[0] = 1/simpData[0]; data[1] = 1/simpData[2]; data[2] = 1/simpData[1]; } else{ data.assign(3,1); } delete simp; return data; } catch(exception& e) { m->errorOut(e, "InvSimpson", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/invsimpson.h000077500000000000000000000012031424121717000213120ustar00rootroot00000000000000#ifndef INVSIMPSON #define INVSIMPSON /* * invsimpson.h * Mothur * * Created by Pat Schloss on 8/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class InvSimpson : public Calculator { public: InvSimpson() : Calculator("invsimpson", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/InvSimpson"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/jackknife.cpp000077500000000000000000000064521424121717000214000ustar00rootroot00000000000000/* * jacknife.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "jackknife.h" #include "utils.hpp" /***********************************************************************/ void Jackknife::getAMatrix(void){ try { Utils util; vector > B = util.binomial(maxOrder); aMat.resize(maxOrder+1); for(int i=0;i<=maxOrder;i++){ aMat[i].resize(maxOrder+1); for(int j=1;j<=maxOrder;j++){ aMat[i][j] = 1 + B[i][j] * (int)(pow(-1.0,j+1)); } } } catch(exception& e) { m->errorOut(e, "Jackknife", "getAMatrix"); exit(1); } } /**************************************************************************************************/ double Jackknife::CN(double z){ try { if(z>6.0) { return 0.0; } if(z<-6.0) { return 0.0; } const double b1= 0.31938153; const double b2= -0.356563782; const double b3= 1.781477937; const double b4= -1.821255978; const double b5= 1.330274429; const double p= 0.2316419; const double c2= 0.3989423; double a=abs(z); double t=1.0/(1.0+a*p); double b=c2*exp((-z)*(z/2.0)); double n=((((b5*t+b4)*t+b3)*t+b2)*t+b1)*t; n = 2*b*n; return n; } catch(exception& e) { m->errorOut(e, "Jackknife", "CN"); exit(1); } } /***********************************************************************/ EstOutput Jackknife::getValues(SAbundVector* rank){ try { //EstOutput jackData(3,0); data.resize(3,0); double jack, jacklci, jackhci; int maxRank = (double)rank->getMaxRank(); int S = rank->getNumBins(); double N[maxOrder+1]; double variance[maxOrder+1]; double p[maxOrder+1]; int k = 0; for(int i=0;i<=maxOrder;i++){ N[i]=0.0000; variance[i]=0.0000; for(int j=1;j<=maxRank;j++){ if(j<=i){ N[i] += aMat[i][j]*rank->get(j); variance[i] += aMat[i][j]*aMat[i][j]*rank->get(j); } else{ N[i] += rank->get(j); variance[i] += rank->get(j); } } variance[i] = variance[i]-N[i]; double var = 0.0000; if(i>0){ for(int j=1;j<=maxRank;j++){ if(j<=i){ var += rank->get(j)*pow((aMat[i][j]-aMat[i-1][j]),2.0); } else { var += 0.0000; } } var -= ((N[i]-N[i-1])*(N[i]-N[i-1]))/S; var = var * S / (S-1); double T = (N[i]-N[i-1])/sqrt(var); if(T<=0.00){ p[i-1] = 1.00000; } else{ p[i-1] = CN(T); } if(p[i-1]>=0.05){ k = i-1; break; } } if(i == maxOrder){ k=1; } } double ci = 0; if(k>1){ double c = (0.05-p[k-1])/(p[k]-p[k-1]); ci = 0.0000; jack = c*N[k]+(1-c)*N[k-1]; for(int j=1;j<=maxRank;j++){ if(j<=k){ ci += rank->get(j)*pow((c*aMat[k][j]+(1-c)*aMat[k-1][j]),2.0); } else { ci += rank->get(j); } } ci = 1.96 * sqrt(ci - jack); } else if(k==1){ jack = N[1]; ci = 1.96*sqrt(variance[1]); }else{ jack = 0.0; ci = 0.0; } jacklci = jack-ci; jackhci = jack+ci; data[0] = jack; data[1] = jacklci; data[2] = jackhci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Jackknife", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/jackknife.h000077500000000000000000000015741424121717000210450ustar00rootroot00000000000000#ifndef JACKKNIFE_H #define JACKKNIFE_H /* * jacknife.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the JackKnife estimator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Jackknife : public Calculator { public: Jackknife() : Calculator("jackknife", 3, false) { getAMatrix(); }; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Jackknife"; } private: static const int maxOrder = 30; vector > aMat; void getAMatrix(); double CN(double); }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/jtt.hpp000066400000000000000000000211451424121717000202520ustar00rootroot00000000000000// // jtt.hpp // Mothur // // Created by Sarah Westcott on 5/26/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef jtt_hpp #define jtt_hpp #include "calculator.h" /* this jtt matrix decomposition due to Elisabeth Tillier */ static double jtteigs[] = {+0.00000000000000,-1.81721720738768,-1.87965834528616,-1.61403121885431, -1.53896608443751,-1.40486966367848,-1.30995061286931,-1.24668414819041, -1.17179756521289,-0.31033320987464,-0.34602837857034,-1.06031718484613, -0.99900602987105,-0.45576774888948,-0.86014403434677,-0.54569432735296, -0.76866956571861,-0.60593589295327,-0.65119724379348,-0.70249806480753}; static double jttprobs[20][20] = {{+0.07686196156903,+0.05105697447152,+0.04254597872702,+0.05126897436552, +0.02027898986051,+0.04106097946952,+0.06181996909002,+0.07471396264303, +0.02298298850851,+0.05256897371552,+0.09111095444453,+0.05949797025102, +0.02341398829301,+0.04052997973502,+0.05053197473402,+0.06822496588753, +0.05851797074102,+0.01433599283201,+0.03230298384851,+0.06637396681302}, {-0.04445795120462,-0.01557336502860,-0.09314817363516,+0.04411372100382, -0.00511178725134,+0.00188472427522,-0.02176250428454,-0.01330231089224, +0.01004072641973,+0.02707838224285,-0.00785039050721,+0.02238829876349, +0.00257470703483,-0.00510311699563,-0.01727154263346,+0.20074235330882, -0.07236268502973,-0.00012690116016,-0.00215974664431,-0.01059243778174}, {+0.09480046389131,+0.00082658405814,+0.01530023104155,-0.00639909042723, +0.00160605602061,+0.00035896642912,+0.00199161318384,-0.00220482855717, -0.00112601328033,+0.14840201765438,-0.00344295714983,-0.00123976286718, -0.00439399942758,+0.00032478785709,-0.00104270266394,-0.02596605592109, -0.05645800566901,+0.00022319903170,-0.00022792271829,-0.16133258048606}, {-0.06924141195400,-0.01816245289173,-0.08104005811201,+0.08985697111009, +0.00279659017898,+0.01083740322821,-0.06449599336038,+0.01794514261221, +0.01036809141699,+0.04283504450449,+0.00634472273784,+0.02339134834111, -0.01748667848380,+0.00161859106290,+0.00622486432503,-0.05854130195643, +0.15083728660504,+0.00030733757661,-0.00143739522173,-0.05295810171941}, {-0.14637948915627,+0.02029296323583,+0.02615316895036,-0.10311538564943, -0.00183412744544,-0.02589124656591,+0.11073673851935,+0.00848581728407, +0.00106057791901,+0.05530240732939,-0.00031533506946,-0.03124002869407, -0.01533984125301,-0.00288717337278,+0.00272787410643,+0.06300929916280, +0.07920438311152,-0.00041335282410,-0.00011648873397,-0.03944076085434}, {-0.05558229086909,+0.08935293782491,+0.04869509588770,+0.04856877988810, -0.00253836047720,+0.07651693957635,-0.06342453535092,-0.00777376246014, -0.08570270266807,+0.01943016473512,-0.00599516526932,-0.09157595008575, -0.00397735155663,-0.00440093863690,-0.00232998056918,+0.02979967701162, -0.00477299485901,-0.00144011795333,+0.01795114942404,-0.00080059359232}, {+0.05807741644682,+0.14654292420341,-0.06724975334073,+0.02159062346633, -0.00339085518294,-0.06829036785575,+0.03520631903157,-0.02766062718318, +0.03485632707432,-0.02436836692465,-0.00397566003573,-0.10095488644404, +0.02456887654357,+0.00381764117077,-0.00906261340247,-0.01043058066362, +0.01651199513994,-0.00210417220821,-0.00872508520963,-0.01495915462580}, {+0.02564617106907,+0.02960554611436,-0.00052356748770,+0.00989267817318, -0.00044034172141,-0.02279910634723,-0.00363768356471,-0.01086345665971, +0.01229721799572,+0.02633650142592,+0.06282966783922,-0.00734486499924, -0.13863936313277,-0.00993891943390,-0.00655309682350,-0.00245191788287, -0.02431633805559,-0.00068554031525,-0.00121383858869,+0.06280025239509}, {+0.11362428251792,-0.02080375718488,-0.08802750967213,-0.06531316372189, -0.00166626058292,+0.06846081717224,+0.07007301248407,-0.01713112936632, -0.05900588794853,-0.04497159138485,+0.04222484636983,+0.00129043178508, -0.01550337251561,-0.01553102163852,-0.04363429852047,+0.01600063777880, +0.05787328925647,-0.00008265841118,+0.02870014572813,-0.02657681214523}, {+0.01840541226842,+0.00610159018805,+0.01368080422265,+0.02383751807012, -0.00923516894192,+0.01209943150832,+0.02906782189141,+0.01992384905334, +0.00197323568330,+0.00017531415423,-0.01796698381949,+0.01887083962858, -0.00063335886734,-0.02365277334702,+0.01209445088200,+0.01308086447947, +0.01286727242301,-0.11420358975688,-0.01886991700613,+0.00238338728588}, {-0.01100105031759,-0.04250695864938,-0.02554356700969,-0.05473632078607, +0.00725906469946,-0.03003724918191,-0.07051526125013,-0.06939439879112, -0.00285883056088,+0.05334304124753,+0.12839241846919,-0.05883473754222, +0.02424304967487,+0.09134510778469,-0.00226003347193,-0.01280041778462, -0.00207988305627,-0.02957493909199,+0.05290385686789,+0.05465710875015}, {-0.01421274522011,+0.02074863337778,-0.01006411985628,+0.03319995456446, -0.00005371699269,-0.12266046460835,+0.02419847062899,-0.00441168706583, -0.08299118738167,-0.00323230913482,+0.02954035119881,+0.09212856795583, +0.00718635627257,-0.02706936115539,+0.04473173279913,-0.01274357634785, -0.01395862740618,-0.00071538848681,+0.04767640012830,-0.00729728326990}, {-0.03797680968123,+0.01280286509478,-0.08614616553187,-0.01781049963160, +0.00674319990083,+0.04208667754694,+0.05991325707583,+0.03581015660092, -0.01529816709967,+0.06885987924922,-0.11719120476535,-0.00014333663810, +0.00074336784254,+0.02893416406249,+0.07466151360134,-0.08182016471377, -0.06581536577662,-0.00018195976501,+0.00167443595008,+0.09015415667825}, {+0.03577726799591,-0.02139253448219,-0.01137813538175,-0.01954939202830, -0.04028242801611,-0.01777500032351,-0.02106862264440,+0.00465199658293, -0.02824805812709,+0.06618860061778,+0.08437791757537,-0.02533125946051, +0.02806344654855,-0.06970805797879,+0.02328376968627,+0.00692992333282, +0.02751392122018,+0.01148722812804,-0.11130404325078,+0.07776346000559}, {-0.06014297925310,-0.00711674355952,-0.02424493472566,+0.00032464353156, +0.00321221847573,+0.03257969053884,+0.01072805771161,+0.06892027923996, +0.03326534127710,-0.01558838623875,+0.13794237677194,-0.04292623056646, +0.01375763233229,-0.11125153774789,+0.03510076081639,-0.04531670712549, -0.06170413486351,-0.00182023682123,+0.05979891871679,-0.02551802851059}, {-0.03515069991501,+0.02310847227710,+0.00474493548551,+0.02787717003457, -0.12038329679812,+0.03178473522077,+0.04445111601130,-0.05334957493090, +0.01290386678474,-0.00376064171612,+0.03996642737967,+0.04777677295520, +0.00233689200639,+0.03917715404594,-0.01755598277531,-0.03389088626433, -0.02180780263389,+0.00473402043911,+0.01964539477020,-0.01260807237680}, {-0.04120428254254,+0.00062717164978,-0.01688703578637,+0.01685776910152, +0.02102702093943,+0.01295781834163,+0.03541815979495,+0.03968150445315, -0.02073122710938,-0.06932247350110,+0.11696314241296,-0.00322523765776, -0.01280515661402,+0.08717664266126,+0.06297225078802,-0.01290501780488, -0.04693925076877,-0.00177653675449,-0.08407812137852,-0.08380714022487}, {+0.03138655228534,-0.09052573757196,+0.00874202219428,+0.06060593729292, -0.03426076652151,-0.04832468257386,+0.04735628794421,+0.14504653737383, -0.01709111334001,-0.00278794215381,-0.03513813820550,-0.11690294831883, -0.00836264902624,+0.03270980973180,-0.02587764129811,+0.01638786059073, +0.00485499822497,+0.00305477087025,+0.02295754527195,+0.00616929722958}, {-0.04898722042023,-0.01460879656586,+0.00508708857036,+0.07730497806331, +0.04252420017435,+0.00484232580349,+0.09861807969412,-0.05169447907187, -0.00917820907880,+0.03679081047330,+0.04998537112655,+0.00769330211980, +0.01805447683564,-0.00498723245027,-0.14148416183376,-0.05170281760262, -0.03230723310784,-0.00032890672639,-0.02363523071957,+0.03801365471627}, {-0.02047562162108,+0.06933781779590,-0.02101117884731,-0.06841945874842, -0.00860967572716,-0.00886650271590,-0.07185241332269,+0.16703684361030, -0.00635847581692,+0.00811478913823,+0.01847205842216,+0.06700967948643, +0.00596607376199,+0.02318239240593,-0.10552958537847,-0.01980199747773, -0.02003785382406,-0.00593392430159,-0.00965391033612,+0.00743094349652}}; /**************************************************************************************************/ //Jones-Taylor-Thornton matrix class JTT : public DistCalc { public: JTT(double c) : DistCalc(c) { name = "JTT (Jones-Taylor-Thornton matrix)"; } double calcDist(Protein A, Protein B) { return (makeDists(A, B, jtteigs, jttprobs));} //calc distance between 2 seqeunces string getCitation() { return "https://evolution.gs.washington.edu/phylip/doc/protdist.html, https://evolution.genetics.washington.edu/phylip/credits.html"; } private: }; /**************************************************************************************************/ #endif /* jtt_hpp */ mothur-1.48.0/source/calculators/kimura.cpp000066400000000000000000000026241424121717000207350ustar00rootroot00000000000000// // kimura.cpp // Mothur // // Created by Sarah Westcott on 7/2/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "kimura.hpp" /***********************************************************************/ double Kimura::calcDist(Protein A, Protein B) { try { int numBases = A.getAlignLength(); vector seqA = A.getAligned(); vector seqB = B.getAligned(); int lm = 0; int n = 0; for (int i = 0; i < numBases; i++) { int numA = seqA[i].getNum(); int numB = seqB[i].getNum(); if ((((long)numA <= (long)val) || ((long)numA == (long)ser)) && (((long)numB <= (long)val) || ((long)numB == (long)ser))) { if (numA == numB) { lm++; } n++; } } double p = 1 - (double)lm / n; double dp = 1.0 - p - 0.2 * p * p; if (dp < 0.0) { m->mothurOut("[WARNING]: DISTANCE BETWEEN SEQUENCES " + A.getName() + " AND " + B.getName() + " IS TOO LARGE FOR KIMURA FORMULA, setting distance to -1.0.\n"); dist = -1.0; } else { dist = -log(dp); } return dist; } catch(exception& e) { m->errorOut(e, "Kimura", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/kimura.hpp000066400000000000000000000015161424121717000207410ustar00rootroot00000000000000// // kimura.hpp // Mothur // // Created by Sarah Westcott on 7/2/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef kimura_hpp #define kimura_hpp #include "calculator.h" /**************************************************************************************************/ //Kimura(Kimura formula) class Kimura : public DistCalc { public: Kimura(double c) : DistCalc(c) { name = "Kimura (Kimura formula)"; } double calcDist(Protein A, Protein B); //calc distance between 2 seqeunces string getCitation() { return "https://evolution.gs.washington.edu/phylip/doc/protdist.html, https://evolution.genetics.washington.edu/phylip/credits.html"; } private: }; /**************************************************************************************************/ #endif /* kimura_hpp */ mothur-1.48.0/source/calculators/kmerdist.cpp000066400000000000000000000062471424121717000212740ustar00rootroot00000000000000// // kmerdist.cpp // Mothur // // Created by Sarah Westcott on 3/29/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "kmerdist.hpp" #include "kmer.hpp" /***********************************************************************/ KmerDist::KmerDist(int k) { try { m = MothurOut::getInstance(); int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; kmerSize = k; maxKmer = power4s[kmerSize]; } catch(exception& e) { m->errorOut(e, "KmerDist", "KmerDist"); exit(1); } } /***********************************************************************/ double KmerDist::calcDist(Sequence A, Sequence B){ try { string seqA = A.getUnaligned(); string seqB = B.getUnaligned(); int numKmers = min(seqA.length(), seqB.length()) - kmerSize + 1; Kmer kmer(kmerSize); int numSeqAKmers = seqA.length() - kmerSize + 1; vector seqAKmers(maxKmer+1,0); for(int j=0;j seqBKmers(maxKmer+1,0); for(int j=0;jerrorOut(e, "KmerDist", "calcDist"); exit(1); } } /***********************************************************************/ vector KmerDist::calcDist(vector A, vector B, int length){ try { int numAKmers = 0; int numMatchingKmers = 0; int numUniqueMatchingKmers = 0; int numKmers = length - kmerSize + 1; for (int i = 0; i < A.size(); i++) { numAKmers += A[i].kCount; if (B[A[i].kmerNumber] != 0) { //does sequence B contain this kmer numMatchingKmers += min(A[i].kCount, B[A[i].kmerNumber]); numUniqueMatchingKmers++; } } double dist1 = log(1.0 - (numMatchingKmers / (float) numKmers)); double dist2 = log(1.0 - (numUniqueMatchingKmers / (float) numKmers)); vector dists; dists.push_back(dist1); dists.push_back(dist2); return dists; } catch(exception& e) { m->errorOut(e, "KmerDist", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/kmerdist.hpp000066400000000000000000000020421424121717000212660ustar00rootroot00000000000000// // kmerdist.hpp // Mothur // // Created by Sarah Westcott on 3/29/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef kmerdist_hpp #define kmerdist_hpp #include "calculator.h" /* This calculator is based on Edgar's method described here, Edgar, 2004 Edgar, R. C. (2004). Muscle: a multiple sequence alignment method with reduced time and space complexity. BMC Bioinformatics, 5:113. */ /**************************************************************************************************/ class KmerDist { public: KmerDist(int k); double calcDist(Sequence A, Sequence B); string getCitation() { return "http://mothur.org"; } vector calcDist(vector A, vector B, int); //A contains indexes to kmers it contains, B is size maxKmer intialized to false with kmers it contains set to true private: int kmerSize, maxKmer; MothurOut* m; }; /**************************************************************************************************/ #endif /* kmerdist_hpp */ mothur-1.48.0/source/calculators/lnabundance.cpp000066400000000000000000000040271424121717000217160ustar00rootroot00000000000000// // lnabundace.cpp // Mothur // // Created by Sarah Westcott on 5/8/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "lnabundance.hpp" /***********************************************************************/ LNAbundance::LNAbundance() : DiversityCalculator(true) {} /***********************************************************************/ vector LNAbundance::getValues(int maxRank, vector& sampling) { //rank->getMaxRank(); //nMax try { maxRank = floor(pow(2.0,ceil(log((double) maxRank)/log(2.0)) + 2.0) + 1.0e-7); results.resize(maxRank, 0.0); int nSamples = sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL gsl_set_error_handler_off(); DiversityUtils dutils("lnabund"); for(int i = 0; i < sampling.size(); i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= maxRank; j++) { int nA = j; double dLog = 0.0, dP = 0.0; if(nA < 100){ //MAX_QUAD dLog = dutils.logLikelihoodQuad(nA, sampling[i].alpha, sampling[i].beta); //nA, dMDash, dV } else{ dLog = dutils.logLikelihoodRampal(nA, sampling[i].alpha, sampling[i].beta); //nA, dMDash, dV } dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=maxRank; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "LNAbundance", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/lnabundance.hpp000066400000000000000000000012211424121717000217140ustar00rootroot00000000000000// // lnabundace.hpp // Mothur // // Created by Sarah Westcott on 5/8/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef lnabundace_hpp #define lnabundace_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class LNAbundance : public DiversityCalculator { public: LNAbundance(); vector getValues(int mr, vector& sampling); string getTag() { return "ln"; } private: }; /***********************************************************************/ #endif /* lnabundace_hpp */ mothur-1.48.0/source/calculators/lnrarefaction.cpp000066400000000000000000000054411424121717000222740ustar00rootroot00000000000000// // lnrarefaction.cpp // Mothur // // Created by Sarah Westcott on 5/13/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "lnrarefaction.hpp" /***********************************************************************/ LNRarefaction::LNRarefaction(double c) : coverage(c), DiversityCalculator(true) {} /***********************************************************************/ inline int compare_doubles(const void* a, const void* b) { double* arg1 = (double *) a; double* arg2 = (double *) b; if( *arg1 < *arg2 ) return -1; else if( *arg1 == *arg2 ) return 0; else return 1; } /***********************************************************************/ vector LNRarefaction::getValues(int numSeqs, vector& sampling){ //rank->getNumSeqs(); //nj try { #ifdef USE_GSL DiversityUtils dutils("lnrarefaction"); int nSamples = sampling.size(); double* adMu = nullptr; double dLower = 0.0, dMedian = 0.0, dUpper = 0.0; gsl_set_error_handler_off(); t_IGParams* atIGParams; atIGParams = (t_IGParams *) malloc(nSamples*sizeof(t_IGParams)); //MAX_SAMPLES //load sampling data for (int i = 0; i < nSamples; i++) { if (m->getControl_pressed()) { free(atIGParams); return results; } atIGParams[i].dAlpha = sampling[i].alpha; atIGParams[i].dBeta = sampling[i].beta; atIGParams[i].nS = sampling[i].ns; atIGParams[i].dC = coverage; } adMu = (double *) malloc(sizeof(double)*nSamples); for(int i = 0; i < nSamples; i++){ adMu[i] = ((double) numSeqs)*dutils.calcMu(&atIGParams[i]); } qsort(adMu, nSamples, sizeof(double), compare_doubles); dLower = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.025); dMedian = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.5); dUpper = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.975); m->mothurOut("\nLNRarefaction - d_Lower = " + toString(dLower) + " d_Median = " + toString(dMedian) + " d_Upper = " + toString(dUpper) + "\n\n"); if (isnan(dLower) || isinf(dLower)) { dLower = 0; } if (isnan(dMedian) || isinf(dMedian)) { dMedian = 0; } if (isnan(dUpper) || isinf(dUpper)) { dUpper = 0; } results.push_back(dLower); results.push_back(dMedian); results.push_back(dUpper); free(adMu); free(atIGParams); #endif return results; } catch(exception& e) { m->errorOut(e, "LNRarefaction", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/lnrarefaction.hpp000066400000000000000000000013551424121717000223010ustar00rootroot00000000000000// // lnrarefaction.hpp // Mothur // // Created by Sarah Westcott on 5/13/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef lnrarefaction_hpp #define lnrarefaction_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class LNRarefaction : public DiversityCalculator { public: LNRarefaction(double c);// : coverage(c) { m = MothurOut::getInstance(); } vector getValues(int ns, vector& sampling); string getTag() { return "ln"; } private: double coverage; }; /***********************************************************************/ #endif /* lnrarefaction_hpp */ mothur-1.48.0/source/calculators/lnshift.cpp000066400000000000000000000036751424121717000211230ustar00rootroot00000000000000// // lnshift.cpp // Mothur // // Created by Sarah Westcott on 5/14/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "lnshift.hpp" /***********************************************************************/ LNShift::LNShift() : DiversityCalculator(true) {} /***********************************************************************/ vector LNShift::getValues(int numSeqs, vector& sampling) { try { int nMax = 100000; //nMax results.resize(nMax, 0.0); int nSamples = sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL DiversityUtils dutils("lnshift"); gsl_set_error_handler_off(); double dShift = log(5.0e5/(double)numSeqs); for(int i = 0; i < sampling.size(); i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= nMax; j++) { int nA = j; double dLog = 0.0, dP = 0.0; if(nA < 100){ //MAX_QUAD dLog = dutils.logLikelihoodQuad(nA, sampling[i].alpha + dShift, sampling[i].beta); } else{ dLog = dutils.logLikelihoodRampal(nA, sampling[i].alpha + dShift, sampling[i].beta); } dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=nMax; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "LNShift", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/lnshift.hpp000066400000000000000000000011701424121717000211140ustar00rootroot00000000000000// // lnshift.hpp // Mothur // // Created by Sarah Westcott on 5/14/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef lnshift_hpp #define lnshift_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class LNShift : public DiversityCalculator { public: LNShift(); vector getValues(int ns, vector& sampling); string getTag() { return "ln"; } private: }; /***********************************************************************/ #endif /* lnshift_hpp */ mothur-1.48.0/source/calculators/logsd.cpp000077500000000000000000000131731424121717000205610ustar00rootroot00000000000000/* * logsd.cpp * Mothur * * Created by Thomas Ryabin on 2/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "logsd.h" /***********************************************************************/ double LogSD::logS(double x){ return -(1-x)*log(1-x)/x; } /***********************************************************************/ EstOutput LogSD::getValues(SAbundVector* rank){ try { /*test data VVV int dstring[] = {0,37,22,12,12,11,11,6,4,3,5,2,4,2,3,2,2,4,2,0,4,4,1,1,0,1,0,0,2,2,0,0,0,2,2,0,0,0,1,1,3,0,2,0,0,0,0,0,2,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,2,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; vector dvec; for(int i = 0; i < 1800; i++) dvec.push_back(dstring[i]); int mr = 1799; int nb = 197; int ns = 6815; SAbundVector rankw = SAbundVector(dvec, mr,nb,ns); SAbundVector *rank = &rankw;*/ data.resize(3,0); double numInd = rank->getNumSeqs(); double numSpec = rank->getNumBins(); double snRatio = (double)numSpec/numInd; double x = .5; double step = .4999999999; while(fabs(snRatio - logS(x)) > .00001) { //This uses a binary search to find the value of x. if(logS(x) > snRatio) x += step; else x -= step; step /= 2; } double alpha = numInd*(1-x)/x; double oct = 1; double octSumObs = 0; double sumObs = 0; double octSumExp = 0; double sumExp = 0; double maxDiff = 0; for(int y = 1; y < rank->size(); y++) { if(y - .5 < pow(2.0, oct)) { octSumObs += rank->get(y); octSumExp += alpha*pow(x,y)/(y); } else { sumObs += octSumObs; octSumObs = rank->get(y); sumExp += octSumExp; octSumExp = alpha*pow(x,y)/(y); oct++; } if(y == rank->size()-1) { sumObs += octSumObs; sumExp += octSumExp; } double diff = fabs(sumObs - .5 - sumExp); if(diff > maxDiff) maxDiff = diff; } data[0] = (maxDiff + .5)/numSpec; data[1] = 0.886/sqrt(numSpec); data[2] = 1.031/sqrt(numSpec); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "LogSD", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/logsd.h000077500000000000000000000014351424121717000202240ustar00rootroot00000000000000#ifndef LOGSD_H #define LOGSD_H /* * logsd.h * Mothur * * Created by Thomas Ryabin on 2/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the LogSD estimator on single group. It is a child of the calculator class.*/ /***********************************************************************/ class LogSD : public Calculator { public: LogSD() : Calculator("logseries", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/LogSeries"; } private: double logS(double); RAbundVector rdata; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/lsabundance.cpp000066400000000000000000000037651424121717000217330ustar00rootroot00000000000000// // lsabundance.cpp // Mothur // // Created by Sarah Westcott on 5/16/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "lsabundance.hpp" /***********************************************************************/ LSAbundance::LSAbundance() : DiversityCalculator(true) {} /***********************************************************************/ vector LSAbundance::getValues(int nMax, vector& sampling) { //int nMax = rank->getMaxRank(); try { nMax = floor(pow(2.0,ceil(log((double) nMax)/log(2.0)) + 2.0) + 1.0e-7); results.resize(nMax, 0.0); int nSamples = sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL DiversityUtils dutils("lsabund"); gsl_set_error_handler_off(); for(int i = 0; i < sampling.size(); i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= nMax; j++) { int nA = j; double dLog = 0.0, dP = 0.0; if(nA < 100){ //MAX_QUAD dLog = dutils.logLikelihoodQuad(nA, sampling[i].alpha, sampling[i].beta, sampling[i].dNu); } else{ dLog = dutils.logLikelihoodRampal(nA, sampling[i].alpha, sampling[i].beta, sampling[i].dNu); } dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=nMax; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "LSAbundance", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/lsabundance.hpp000066400000000000000000000012201424121717000217200ustar00rootroot00000000000000// // lsabundance.hpp // Mothur // // Created by Sarah Westcott on 5/16/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef lsabundance_hpp #define lsabundance_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class LSAbundance : public DiversityCalculator { public: LSAbundance(); vector getValues(int mr, vector& sampling); string getTag() { return "ls"; } private: }; /***********************************************************************/ #endif /* lsabundance_hpp */ mothur-1.48.0/source/calculators/lsrarefaction.cpp000066400000000000000000000056171424121717000223060ustar00rootroot00000000000000// // lsrarefaction.cpp // Mothur // // Created by Sarah Westcott on 5/20/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "lsrarefaction.hpp" /***********************************************************************/ LSRarefaction::LSRarefaction(double c) : coverage(c), DiversityCalculator(true) {} /***********************************************************************/ int compare_doubles2(const void* a, const void* b) { double* arg1 = (double *) a; double* arg2 = (double *) b; if( *arg1 < *arg2 ) return -1; else if( *arg1 == *arg2 ) return 0; else return 1; } /***********************************************************************/ vector LSRarefaction::getValues(int numSeqs, vector& sampling){ //rank->getNumSeqs(); //nj try { #ifdef USE_GSL DiversityUtils dutils("lsrarefaction"); int nSamples = sampling.size(); double* adMu = nullptr; double dLower = 0.0, dMedian = 0.0, dUpper = 0.0; gsl_set_error_handler_off(); t_LSParams* atLSParams; atLSParams = (t_LSParams *) malloc(nSamples*sizeof(t_LSParams)); //MAX_SAMPLES //load sampling data for (int i = 0; i < nSamples; i++) { if (m->getControl_pressed()) { free(atLSParams); return results; } atLSParams[i].dMDash = sampling[i].alpha; atLSParams[i].dV = sampling[i].beta; atLSParams[i].dNu = sampling[i].dNu; atLSParams[i].dC = coverage; atLSParams[i].n = 0; } adMu = (double *) malloc(sizeof(double)*nSamples); for(int i = 0; i < nSamples; i++){ adMu[i] = ((double) numSeqs)*dutils.calcMu(&atLSParams[i]); } qsort(adMu, nSamples, sizeof(double), compare_doubles2); dLower = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.025); dMedian = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.5); dUpper = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.975); if (isnan(dLower) || isinf(dLower)) { dLower = 0; } if (isnan(dMedian) || isinf(dMedian)) { dMedian = 0; } if (isnan(dUpper) || isinf(dUpper)) { dUpper = 0; } m->mothurOut("\nLSRarefaction - d_Lower = " + toString(dLower) + " d_Median = " + toString(dMedian) + " d_Upper = " + toString(dUpper) + "\n\n"); results.push_back(dLower); results.push_back(dMedian); results.push_back(dUpper); //printf("%.2e:%.2e:%.2e ", dLower, dMedian, dUpper); free(adMu); free(atLSParams); #endif return results; } catch(exception& e) { m->errorOut(e, "LSRarefaction", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/lsrarefaction.hpp000066400000000000000000000012741424121717000223060ustar00rootroot00000000000000// // lsrarefaction.hpp // Mothur // // Created by Sarah Westcott on 5/20/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef lsrarefaction_hpp #define lsrarefaction_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class LSRarefaction : public DiversityCalculator { public: LSRarefaction(double c); vector getValues(int ns, vector& sampling); string getTag() { return "ls"; } private: double coverage; }; /***********************************************************************/ #endif /* lsrarefaction_hpp */ mothur-1.48.0/source/calculators/manhattan.cpp000077500000000000000000000014021424121717000214140ustar00rootroot00000000000000/* * manhattan.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "manhattan.h" /***********************************************************************/ EstOutput Manhattan::getValues(vector shared) { try { data.resize(1,0); double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); sum += abs((Aij - Bij)); } data[0] = sum; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Manhattan", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/manhattan.h000077500000000000000000000012111424121717000210570ustar00rootroot00000000000000#ifndef MANHATTAN_H #define MANHATTAN_H /* * manhattan.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Manhattan : public Calculator { public: Manhattan() : Calculator("manhattan", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Manhattan"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/mcc.cpp000077500000000000000000000016561424121717000202160ustar00rootroot00000000000000// // mcc.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "mcc.hpp" /***********************************************************************/ double MCC::getValue(double tp, double tn, double fp, double fn) { try { double p = tp + fn; double n = fp + tn; double pPrime = tp + fp; double nPrime = tn + fn; double matthewsCorrCoef = ((tp * tn) - (fp * fn)) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0 || pPrime == 0 || nPrime == 0){ matthewsCorrCoef = 0; } if (isnan(matthewsCorrCoef) || isinf(matthewsCorrCoef)) { matthewsCorrCoef = 0; } return matthewsCorrCoef; } catch(exception& e) { m->errorOut(e, "MCC", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/mcc.hpp000077500000000000000000000011411424121717000202100ustar00rootroot00000000000000// // mcc.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef mcc_hpp #define mcc_hpp #include "calculator.h" /***********************************************************************/ class MCC : public ClusterMetric { public: MCC() : ClusterMetric("mcc") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/MCC"; } private: }; /***********************************************************************/ #endif /* mcc_hpp */ mothur-1.48.0/source/calculators/memchi2.cpp000077500000000000000000000025301424121717000207700ustar00rootroot00000000000000/* * memchi2.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "memchi2.h" /***********************************************************************/ EstOutput MemChi2::getValues(vector shared) { try { data.resize(1,0); int nonZeroA = 0; int nonZeroB = 0; int totalOtus = shared[0]->getNumBins(); //int totalGroups = shared.size(); //for each otu for (int i = 0; i < shared[0]->getNumBins(); i++) { if (shared[0]->get(i) != 0) { nonZeroA++; } if (shared[1]->get(i) != 0) { nonZeroB++; } } double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); if (A > 0) { A = 1; } if (B > 0) { B = 1; } double Aterm = A / (float) nonZeroA; double Bterm = B / (float) nonZeroB; int incidence = 0; for(int j=0;jget(i) != 0){ incidence++; } } if(incidence != 0){ sum += (((Aterm-Bterm)*(Aterm-Bterm))/incidence); } } data[0] = sqrt(totalOtus * sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "MemChi2", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/memchi2.h000077500000000000000000000013211424121717000204320ustar00rootroot00000000000000#ifndef MEMCHI2_H #define MEMCHI2_H /* * memchi2.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class MemChi2 : public Calculator { public: MemChi2() : Calculator("memchi2", 1, false, true) {}; //the true means this calculator needs all groups to calculate the pair value EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Memchi2"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/memchord.cpp000077500000000000000000000022321424121717000212410ustar00rootroot00000000000000/* * memchord.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "memchord.h" /***********************************************************************/ EstOutput MemChord::getValues(vector shared) { try { data.resize(1,0); double nonZeroA = 0; double nonZeroB = 0; //for each otu for (int i = 0; i < shared[0]->getNumBins(); i++) { if (shared[0]->get(i) != 0) { nonZeroA++; } if (shared[1]->get(i) != 0) { nonZeroB++; } } nonZeroA = sqrt(nonZeroA); nonZeroB = sqrt(nonZeroB); double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); if (A > 0) { A = 1; } if (B > 0) { B = 1; } double Aterm = A / nonZeroA; double Bterm = B / nonZeroB; sum += ((Aterm-Bterm)*(Aterm-Bterm)); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "MemChord", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/memchord.h000077500000000000000000000012051424121717000207050ustar00rootroot00000000000000#ifndef MEMCHORD_H #define MEMCHORD_H /* * memchord.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class MemChord : public Calculator { public: MemChord() : Calculator("memchord", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Memchord"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/memeuclidean.cpp000077500000000000000000000014741424121717000221020ustar00rootroot00000000000000/* * memeuclidean.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "memeuclidean.h" /***********************************************************************/ EstOutput MemEuclidean::getValues(vector shared) { try { data.resize(1,0); double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); if (A > 0) { A = 1; } if (B > 0) { B = 1; } sum += ((A-B)*(A-B)); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "MemEuclidean", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/memeuclidean.h000077500000000000000000000012401424121717000215360ustar00rootroot00000000000000#ifndef MEMEUCLIDEAN_H #define MEMEUCLIDEAN_H /* * memeuclidean.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class MemEuclidean : public Calculator { public: MemEuclidean() : Calculator("memeuclidean", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Memeuclidean"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/mempearson.cpp000077500000000000000000000030461424121717000216150ustar00rootroot00000000000000/* * mempearson.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mempearson.h" /***********************************************************************/ EstOutput MemPearson::getValues(vector shared) { try { data.resize(1,0); int nonZeroA = 0; int nonZeroB = 0; int numOTUS = shared[0]->getNumBins(); //for each otu for (int i = 0; i < shared[0]->getNumBins(); i++) { if (shared[0]->get(i) != 0) { nonZeroA++; } if (shared[1]->get(i) != 0) { nonZeroB++; } } double numTerm = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; double averageA = nonZeroA / (float) numOTUS; double averageB = nonZeroB / (float) numOTUS; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); if (Aij > 0) { Aij = 1; } if (Bij > 0) { Bij = 1; } numTerm += ((Aij - averageA) * (Bij - averageB)); denomTerm1 += ((Aij - averageA) * (Aij - averageA)); denomTerm2 += ((Bij - averageB) * (Bij - averageB)); } denomTerm1 = sqrt(denomTerm1); denomTerm2 = sqrt(denomTerm2); double denom = denomTerm1 * denomTerm2; if (!util.isEqual(denom, 0)) { data[0] = (numTerm / denom); }else { data[0] = 1.0; } if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "MemPearson", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/mempearson.h000077500000000000000000000012231424121717000212550ustar00rootroot00000000000000#ifndef MEMPEARSON_H #define MEMPEARSON_H /* * mempearson.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class MemPearson : public Calculator { public: MemPearson() : Calculator("mempearson", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Mempearson"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/metroig.cpp000066400000000000000000000170521424121717000211140ustar00rootroot00000000000000// // metroig.cpp // Mothur // // Created by Sarah Westcott on 4/8/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "metroig.hpp" /*constants for simplex minimisation*/ /***********************************************************************/ MetroIG::MetroIG(int fi, double sigA, double sigB, double sigS, int n, string stub) : sigmaA(sigA), sigmaB(sigB), sigmaS(sigS), nIters(n), fitIters(fi), outFileStub(stub), DiversityCalculator(false) {} /***********************************************************************/ #ifdef USE_GSL double nLogLikelihood0(const gsl_vector * x, void * params) { double dAlpha = gsl_vector_get(x,0), dBeta = gsl_vector_get(x,1); int nS = (int) floor(gsl_vector_get(x, 2)); t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dAlpha <= 0.0 || dBeta <= 0.0){ return PENALTY; } DiversityUtils dutils("metroig"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; dLogP = dutils.logLikelihood(nA, dAlpha, dBeta); dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihood(0, dAlpha, dBeta); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ double negLogLikelihood0(double dAlpha, double dBeta, int nS, void * params) { t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dAlpha <= 0.0 || dBeta <= 0.0){ return PENALTY; } DiversityUtils dutils("metroig"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; dLogP = dutils.logLikelihood(nA, dAlpha, dBeta); dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihood(0, dAlpha, dBeta); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ void* metropolis0 (void * pvInitMetro) { t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro; gsl_vector *ptX = ptMetroInit->ptX; t_Data *ptData = ptMetroInit->ptData; t_Params *ptParams = ptMetroInit->ptParams; gsl_vector *ptXDash = gsl_vector_alloc(3); /*proposal*/ char *szSampleFile = (char *) malloc(1024*sizeof(char)); const gsl_rng_type *T; gsl_rng *ptGSLRNG; //FILE *sfp = nullptr; int nS = 0, nSDash = 0, nIter = 0; double dRand = 0.0, dNLL = 0.0; void *pvRet = nullptr; /*set up random number generator*/ T = gsl_rng_default; ptGSLRNG = gsl_rng_alloc (T); nS = (int) floor(gsl_vector_get(ptX,2)); dNLL = negLogLikelihood0(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), nS,(void*) ptData); string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample"; ofstream out; Utils util; util.openOutputFile(filename, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); /*seed random number generator*/ gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed); DiversityUtils dutils("metroig"); /*now perform simple Metropolis algorithm*/ while(nIter < ptParams->nIter){ double dA = 0.0, dNLLDash = 0.0; if (dutils.m->getControl_pressed()) { break; } dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS, ptParams); dNLLDash = negLogLikelihood0(gsl_vector_get(ptXDash,0), gsl_vector_get(ptXDash,1), nSDash, (void*) ptData); dA = exp(dNLL - dNLLDash); if(dA > 1.0){ dA = 1.0; } dRand = gsl_rng_uniform(ptGSLRNG); if(dRand < dA){ gsl_vector_memcpy(ptX, ptXDash); nS = nSDash; dNLL = dNLLDash; ptMetroInit->nAccepted++; } if(nIter % SLICE == 0){ out << nIter << "," << gsl_vector_get(ptX, 0) << "," << gsl_vector_get(ptX, 1) << "," << nS << "," << dNLL << endl; } nIter++; } out.close(); /*free up allocated memory*/ gsl_vector_free(ptXDash); free(szSampleFile); gsl_rng_free(ptGSLRNG); return pvRet; } #endif /***********************************************************************/ vector MetroIG::getValues(SAbundVector* rank){ try { t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaA; tParams.dSigmaY = sigmaB; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed(); t_Data tData; int bestSample = 0; #ifdef USE_GSL DiversityUtils dutils("metroig"); dutils.loadAbundance(&tData, rank); gsl_vector* ptX = gsl_vector_alloc(3); /*parameter estimates*/ int sampled = rank->getNumSeqs(); //nj int numOTUs = rank->getNumBins(); //nl gsl_rng_env_setup(); gsl_set_error_handler_off(); /*set initial estimates for parameters*/ gsl_vector_set(ptX, 0, 1.0); gsl_vector_set(ptX, 1, 5.0); gsl_vector_set(ptX, 2, numOTUs*2); double chaoResult = dutils.chao(&tData); m->mothurOut("\nMetroIG - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n"); dutils.minimiseSimplex(ptX, 3, (void*) &tData, &nLogLikelihood0, 0.1, 1.0e-2, 100000); vector parameterResults = dutils.outputResults(ptX, &tData, &nLogLikelihood0); if(tParams.nIter > 0){ vector acceptanceRates = dutils.mcmc(&tParams, &tData, ptX, &metropolis0); if (fitIters != 0) { bestSample = dutils.fitSigma(acceptanceRates, parameterResults, fitIters, &tParams, &tData, ptX, &metropolis0); } } /*free up allocated memory*/ gsl_vector_free(ptX); dutils.freeAbundance(&tData); #endif outputs.push_back(outFileStub + "_" + toString(bestSample) + ".sample"); if (bestSample == 0) { outputs.push_back(outFileStub + "_1.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 1) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 2) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_1.sample"); } return outputs; } catch(exception& e) { m->errorOut(e, "MetroIG", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/metroig.hpp000066400000000000000000000014051424121717000211140ustar00rootroot00000000000000// // metroig.hpp // Mothur // // Created by Sarah Westcott on 4/8/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef metroig_hpp #define metroig_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class MetroIG : public DiversityCalculator { public: MetroIG(int fi, double sigA, double sigB, double sigS, int n, string stub); vector getValues(SAbundVector* rank); string getTag() { return "ig"; } private: double sigmaA, sigmaB, sigmaS; int nIters, fitIters; string outFileStub; }; /***********************************************************************/ #endif /* metroig_hpp */ mothur-1.48.0/source/calculators/metrolognormal.cpp000066400000000000000000000215711424121717000225100ustar00rootroot00000000000000// // metrolognormal.c // Mothur // // Created by Sarah Westcott on 4/25/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "metrolognormal.hpp" /*constants for calculated compound Poisson lognormal*/ /***********************************************************************/ MetroLogNormal::MetroLogNormal(int fi, double sigx, double sigy, double sigS, int n, string st) : sigmaX(sigx), sigmaY(sigy), sigmaS(sigS), nIters(n), outFileStub(st), fitIters(fi), DiversityCalculator(false) {} /***********************************************************************/ #ifdef USE_GSL /***********************************************************************/ double nLogLikelihood1(const gsl_vector * x, void * params) { MothurOut* m = MothurOut::getInstance(); try { double dMDash = gsl_vector_get(x,0), dV = gsl_vector_get(x,1); int nS = (int) floor(gsl_vector_get(x, 2)); t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; DiversityUtils dutils("metroln"); for(i = 0; i < ptData->nNA; i++){ if (m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; if(nA < 100){ dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV); } else{ dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV); } dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLogL += (nS - ptData->nL)*dutils.logLikelihoodQuad(0, dMDash, dV); dLogL -= gsl_sf_lnfact(nS - ptData->nL); dLogL += gsl_sf_lnfact(nS); /*return*/ return -dLogL; }catch(exception& e) { m->errorOut(e, "MetroLogNormal", "nLogLikelihood1"); exit(1); } } /***********************************************************************/ double negLogLikelihood1(double dMDash, double dV, int nS, void * params) { MothurOut* m = MothurOut::getInstance(); try { t_Data *ptData = (t_Data *) params; int i = 0; double dLog0 = 0.0, dLogL = 0.0; DiversityUtils dutils("metroln"); for(i = 0; i < ptData->nNA; i++){ if (m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; if(nA < 100){ dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV); } else{ dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV); } dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihoodQuad(0, dMDash, dV); if(nS > ptData->nL){ dLogL += (nS - ptData->nL)*dLog0; } dLogL -= gsl_sf_lnfact(nS - ptData->nL); dLogL += gsl_sf_lnfact(nS); return -dLogL; }catch(exception& e) { m->errorOut(e, "MetroLogNormal", "negLogLikelihood1"); exit(1); } } /***********************************************************************/ void* metropolis1 (void * pvInitMetro) { MothurOut* m = MothurOut::getInstance(); try { t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro; gsl_vector *ptX = ptMetroInit->ptX; t_Data *ptData = ptMetroInit->ptData; t_Params *ptParams = ptMetroInit->ptParams; gsl_vector *ptXDash = gsl_vector_alloc(3); /*proposal*/ const gsl_rng_type *T; gsl_rng *ptGSLRNG; int nS = 0, nSDash = 0,nIter = 0; double dRand = 0.0, dNLL = 0.0; void *pvRet = nullptr; double dM = 0.0, dV = 0.0; double dMDash = 0.0, dVDash = 0.0; double dXDash = 0.0, dX = 0.0; /*set up random number generator*/ T = gsl_rng_default; ptGSLRNG = gsl_rng_alloc (T); nS = (int) floor(gsl_vector_get(ptX,2)); dNLL = negLogLikelihood1(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), nS,(void*) ptData); dM = gsl_vector_get(ptX,0); dV = gsl_vector_get(ptX,1); gsl_vector_set(ptX,0,dM + 0.5*dV); string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample"; ofstream out; Utils util; util.openOutputFile(filename, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); /*seed random number generator*/ gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed); DiversityUtils dutils("metroln"); /*now perform simple Metropolis algorithm*/ while(nIter < ptParams->nIter){ double dA = 0.0, dNLLDash = 0.0; if (m->getControl_pressed()) { break; } dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS,ptParams); dXDash = gsl_vector_get(ptXDash,0); dVDash = gsl_vector_get(ptXDash,1); dMDash = dXDash - 0.5*dVDash; dNLLDash = negLogLikelihood1(dMDash, dVDash, nSDash, (void*) ptData); dA = exp(dNLL - dNLLDash); if(dA > 1.0){ dA = 1.0; } dRand = gsl_rng_uniform(ptGSLRNG); if(dRand < dA){ ptMetroInit->nAccepted++; gsl_vector_memcpy(ptX, ptXDash); nS = nSDash; dNLL = dNLLDash; } if(nIter % 10 == 0){ dX = gsl_vector_get(ptX,0); dV = gsl_vector_get(ptX,1); dM = dX - 0.5*dV; out << nIter << "," << dM << "," << dV << "," << nS << "," << dNLL << endl; } nIter++; } out.close(); /*free up allocated memory*/ gsl_vector_free(ptXDash); gsl_rng_free(ptGSLRNG); return pvRet; }catch(exception& e) { m->errorOut(e, "MetroLogNormal", "metropolis1"); exit(1); } } #endif /***********************************************************************/ vector MetroLogNormal::getValues(SAbundVector* rank){ try { t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaX; tParams.dSigmaY = sigmaY; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed(); t_Data tData; int bestSample = 0; #ifdef USE_GSL DiversityUtils dutils("metroln"); dutils.loadAbundance(&tData, rank); gsl_vector* ptX = gsl_vector_alloc(3); gsl_rng_env_setup(); gsl_set_error_handler_off(); dutils.loadAbundance(&tData, rank); int sampled = rank->getNumSeqs(); //nj int numOTUs = rank->getNumBins(); //nl gsl_vector_set(ptX, 0, 1.0); //INIT_M_DASH gsl_vector_set(ptX, 1, 1.0); //INIT_V gsl_vector_set(ptX, 2, numOTUs*2); double chaoResult = dutils.chao(&tData); m->mothurOut("\nMetroLogNormal - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n"); dutils.minimiseSimplex(ptX, 3, (void*) &tData, &nLogLikelihood1, 1.0, 1.0e-2, 100000); vector parameterResults = dutils.outputResults(ptX, &tData, &nLogLikelihood1); if(tParams.nIter > 0){ vector acceptanceRates = dutils.mcmc(&tParams, &tData, ptX, &metropolis1); //sigmaX 0.1 if (fitIters != 0) { bestSample = dutils.fitSigma(acceptanceRates, parameterResults, fitIters, &tParams, &tData, ptX, &metropolis1); } } /*free up allocated memory*/ gsl_vector_free(ptX); dutils.freeAbundance(&tData); #endif outputs.push_back(outFileStub + "_" + toString(bestSample) + ".sample"); if (bestSample == 0) { outputs.push_back(outFileStub + "_1.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 1) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 2) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_1.sample"); } return outputs; } catch(exception& e) { m->errorOut(e, "MetroLogNormal", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/metrolognormal.hpp000066400000000000000000000016041424121717000225100ustar00rootroot00000000000000// // metrolognormal.h // Mothur // // Created by Sarah Westcott on 4/25/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef metrolognormal_h #define metrolognormal_h #include "diversityutils.hpp" #include "diversitycalc.h" //MetroLogNormal - fits a compound Poisson Log-Normal distn to a sample /***********************************************************************/ class MetroLogNormal : public DiversityCalculator { public: MetroLogNormal(int fi, double sigx, double sigy, double sigS, int n, string st); ~MetroLogNormal() = default; vector getValues(SAbundVector* rank); string getTag() { return "ln"; } private: double sigmaX, sigmaY, sigmaS; int nIters, fitIters; string outFileStub; }; /***********************************************************************/ #endif /* metrolognormal_h */ mothur-1.48.0/source/calculators/metrologstudent.cpp000066400000000000000000000202771424121717000227100ustar00rootroot00000000000000// // metrologstudent.cpp // Mothur // // Created by Sarah Westcott on 5/2/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "metrologstudent.hpp" /***********************************************************************/ MetroLogStudent::MetroLogStudent(int fi, double sigm, double sigv, double sign, double sigS, int n, string st) : sigmaM(sigm), sigmaV(sigv), sigmaN(sign), sigmaS(sigS), nIters(n), outFileStub(st), fitIters(fi), DiversityCalculator(false) {} /***********************************************************************/ #ifdef USE_GSL /***********************************************************************/ double nLogLikelihood2(const gsl_vector * x, void * params) { double dMDash = gsl_vector_get(x,0), dV = gsl_vector_get(x,1); double dNu = gsl_vector_get(x,2); int nS = (int) floor(gsl_vector_get(x, 3)); t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dV <= 0.0 || dNu < 1.0){ return PENALTY; } DiversityUtils dutils("metrols"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; if(nA < 100){ //MAX_QUAD dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV, dNu); } else{ dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV, dNu); } dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihoodQuad(0, dMDash, dV, dNu); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ double negLogLikelihood(double dMDash, double dV, double dNu, int nS, void * params) { t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dV <= 0.0 || dNu < 1.0){ return PENALTY; } DiversityUtils dutils("metrols"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; if(nA < 100){ //MAX_QUAD dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV, dNu); } else{ dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV, dNu); } dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihoodQuad(0, dMDash, dV, dNu); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ void* metropolis2 (void * pvInitMetro) { t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro; gsl_vector *ptX = ptMetroInit->ptX; t_Data *ptData = ptMetroInit->ptData; t_Params *ptParams = ptMetroInit->ptParams; gsl_vector *ptXDash = gsl_vector_alloc(4); /*proposal*/ char *szSampleFile = (char *) malloc(1024*sizeof(char)); const gsl_rng_type *T; gsl_rng *ptGSLRNG; int nS = 0, nSDash = 0, nIter = 0; double dRand = 0.0, dNLL = 0.0; void *pvRet = nullptr; /*set up random number generator*/ T = gsl_rng_default; ptGSLRNG = gsl_rng_alloc (T); nS = (int) floor(gsl_vector_get(ptX,3)); dNLL = negLogLikelihood(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), gsl_vector_get(ptX,2), nS,(void*) ptData); string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample"; ofstream out; Utils util; util.openOutputFile(filename, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); /*seed random number generator*/ gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed); DiversityUtils dutils("metrols"); /*now perform simple Metropolis algorithm*/ while(nIter < ptParams->nIter){ if (dutils.m->getControl_pressed()) { break; } double dA = 0.0, dNLLDash = 0.0; dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS, ptParams); dNLLDash = negLogLikelihood(gsl_vector_get(ptXDash,0), gsl_vector_get(ptXDash,1), gsl_vector_get(ptXDash,2), nSDash, (void*) ptData); dA = exp(dNLL - dNLLDash); if(dA > 1.0){ dA = 1.0; } dRand = gsl_rng_uniform(ptGSLRNG); if(dRand < dA){ gsl_vector_memcpy(ptX, ptXDash); nS = nSDash; dNLL = dNLLDash; ptMetroInit->nAccepted++; } if(nIter % 10 == 0){ out << nIter << "," << gsl_vector_get(ptX, 0) << "," << gsl_vector_get(ptX, 1) << "," << gsl_vector_get(ptX, 2) << "," << nS << "," << dNLL << endl; }else if(nIter % 10000 == 0){ cout << nIter << endl; } nIter++; } out.close(); /*free up allocated memory*/ gsl_vector_free(ptXDash); free(szSampleFile); gsl_rng_free(ptGSLRNG); return pvRet; } #endif /***********************************************************************/ vector MetroLogStudent::getValues(SAbundVector* rank){ try { t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaM; tParams.dSigmaY = sigmaV; tParams.dSigmaN = sigmaN; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed(); t_Data tData; int bestSample = 0; #ifdef USE_GSL DiversityUtils dutils("metrols"); dutils.loadAbundance(&tData, rank); int sampled = rank->getNumSeqs(); //nj int numOTUs = rank->getNumBins(); //nl gsl_vector* ptX = gsl_vector_alloc(4); /*parameter estimates*/ gsl_rng_env_setup(); gsl_set_error_handler_off(); /*set initial estimates for parameters*/ gsl_vector_set(ptX, 0, -10.0); //INIT_M gsl_vector_set(ptX, 1, 20.0); //INIT_V gsl_vector_set(ptX, 2, 20.0); //INIT_N gsl_vector_set(ptX, 3, numOTUs*2); double chaoResult = dutils.chao(&tData); m->mothurOut("\nMetroLogStudent - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n"); dutils.minimiseSimplex(ptX, 4, (void*) &tData, &nLogLikelihood2, 0.1, 1.0e-3, 100000); vector parameterResults = dutils.outputResults(ptX, &tData, &nLogLikelihood2); if(tParams.nIter > 0){ vector acceptanceRates = dutils.mcmc(&tParams, &tData, ptX, &metropolis2); if (fitIters != 0) { bestSample = dutils.fitSigma(acceptanceRates, parameterResults, fitIters, &tParams, &tData, ptX, &metropolis2); } } /*free up allocated memory*/ gsl_vector_free(ptX); dutils.freeAbundance(&tData); #endif outputs.push_back(outFileStub + "_" + toString(bestSample) + ".sample"); if (bestSample == 0) { outputs.push_back(outFileStub + "_1.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 1) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 2) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_1.sample"); } return outputs; } catch(exception& e) { m->errorOut(e, "MetroLogStudent", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/metrologstudent.hpp000066400000000000000000000016411424121717000227070ustar00rootroot00000000000000// // metrologstudent.hpp // Mothur // // Created by Sarah Westcott on 5/2/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef metrologstudent_hpp #define metrologstudent_hpp #include "diversityutils.hpp" #include "diversitycalc.h" //MetroLogStudent - Samples the Poisson Log-Student distn to SADs /***********************************************************************/ class MetroLogStudent : public DiversityCalculator { public: MetroLogStudent(int fi, double sigm, double sigv, double sign, double sigS, int n, string st); ~MetroLogStudent() = default; vector getValues(SAbundVector* rank); string getTag() { return "ls"; } private: double sigmaM, sigmaV, sigmaN, sigmaS; int nIters, fitIters; string outFileStub; }; /***********************************************************************/ #endif /* metrologstudent_hpp */ mothur-1.48.0/source/calculators/metrosichel.cpp000066400000000000000000000175751424121717000217760ustar00rootroot00000000000000// // metrosichel.cpp // Mothur // // Created by Sarah Westcott on 5/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "metrosichel.hpp" /***********************************************************************/ MetroSichel::MetroSichel(int af, double siga, double sigb, double sigg, double sigS, int n, string st) : sigmaA(siga), sigmaB(sigb), sigmaG(sigg), sigmaS(sigS), nIters(n), outFileStub(st), fitIters(af), DiversityCalculator(false) {} /***********************************************************************/ #ifdef USE_GSL /***********************************************************************/ double nLogLikelihood3(const gsl_vector * x, void * params) { double dAlpha = gsl_vector_get(x,0), dBeta = gsl_vector_get(x,1); double dGamma = gsl_vector_get(x,2); int nS = (int) floor(gsl_vector_get(x, 3)); t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dAlpha <= 0.0 || dBeta <= 0.0){ return PENALTY; } DiversityUtils dutils("metrosichel"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; dLogP = dutils.logLikelihood(nA, dAlpha, dBeta, dGamma); dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihood(0, dAlpha, dBeta, dGamma); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ double negLogLikelihood3(double dAlpha, double dBeta, double dGamma, int nS, void * params) { t_Data *ptData = (t_Data *) params; int i = 0; double dLogL = 0.0; double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0; if(dAlpha <= 0.0 || dBeta <= 0.0){ return PENALTY; } DiversityUtils dutils("metrosichel"); for(i = 0; i < ptData->nNA; i++){ if (dutils.m->getControl_pressed()) { break; } double dLogP = 0.0; int nA = ptData->aanAbund[i][0]; dLogP = dutils.logLikelihood(nA, dAlpha, dBeta, dGamma); dLogL += ((double) ptData->aanAbund[i][1])*dLogP; dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]); } dLog0 = dutils.logLikelihood(0, dAlpha, dBeta, dGamma); dLog1 = (nS - ptData->nL)*dLog0; dLog2 = - gsl_sf_lnfact(nS - ptData->nL); dLog3 = gsl_sf_lnfact(nS); dLogL += dLog1 + dLog2 + dLog3; /*return*/ return -dLogL; } /***********************************************************************/ void* metropolis3 (void * pvInitMetro) { t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro; gsl_vector *ptX = ptMetroInit->ptX; t_Data *ptData = ptMetroInit->ptData; t_Params *ptParams = ptMetroInit->ptParams; gsl_vector *ptXDash = gsl_vector_alloc(4); /*proposal*/ char *szSampleFile = (char *) malloc(1024*sizeof(char)); const gsl_rng_type *T; gsl_rng *ptGSLRNG; int nS = 0, nSDash = 0, nIter = 0; double dRand = 0.0, dNLL = 0.0; void *pvRet = nullptr; /*set up random number generator*/ T = gsl_rng_default; ptGSLRNG = gsl_rng_alloc (T); nS = (int) floor(gsl_vector_get(ptX,3)); dNLL = negLogLikelihood3(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), gsl_vector_get(ptX,2), nS,(void*) ptData); string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample"; ofstream out; Utils util; util.openOutputFile(filename, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); /*seed random number generator*/ gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed); DiversityUtils dutils("metrosichel"); /*now perform simple Metropolis algorithm*/ while(nIter < ptParams->nIter){ double dA = 0.0, dNLLDash = 0.0; if (dutils.m->getControl_pressed()) { break; } dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS, ptParams); dNLLDash = negLogLikelihood3(gsl_vector_get(ptXDash,0), gsl_vector_get(ptXDash,1), gsl_vector_get(ptXDash,2), nSDash, (void*) ptData); dA = exp(dNLL - dNLLDash); if(dA > 1.0){ dA = 1.0; } dRand = gsl_rng_uniform(ptGSLRNG); if(dRand < dA){ gsl_vector_memcpy(ptX, ptXDash); nS = nSDash; dNLL = dNLLDash; ptMetroInit->nAccepted++; } if(nIter % 10 == 0){ out << nIter << "," << gsl_vector_get(ptX, 0) << "," << gsl_vector_get(ptX, 1) << "," << gsl_vector_get(ptX, 2) << "," << nS << "," << dNLL << endl; } nIter++; } out.close(); /*free up allocated memory*/ gsl_vector_free(ptXDash); free(szSampleFile); gsl_rng_free(ptGSLRNG); return pvRet; } #endif /***********************************************************************/ vector MetroSichel::getValues(SAbundVector* rank){ try { t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaA; tParams.dSigmaY = sigmaB; tParams.dSigmaN = sigmaG; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed(); t_Data tData; int bestSample = 0; #ifdef USE_GSL DiversityUtils dutils("metrosichel"); dutils.loadAbundance(&tData, rank); int sampled = rank->getNumSeqs(); //nj int numOTUs = rank->getNumBins(); //nl gsl_vector* ptX = gsl_vector_alloc(4); /*parameter estimates*/ gsl_rng_env_setup(); gsl_set_error_handler_off(); /*set initial estimates for parameters*/ gsl_vector_set(ptX, 0, 0.1); //INIT_A gsl_vector_set(ptX, 1, 1.0); //INIT_B gsl_vector_set(ptX, 2, -0.5); //INIT_G gsl_vector_set(ptX, 3, numOTUs*2); double chaoResult = dutils.chao(&tData); m->mothurOut("\nMetroSichel - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n"); dutils.minimiseSimplex(ptX, 4, (void*) &tData, &nLogLikelihood3, 0.1, 1.0e-5, 100000); vector parameterResults = dutils.outputResults(ptX, &tData, &nLogLikelihood3); if(tParams.nIter > 0){ vector acceptanceRates = dutils.mcmc(&tParams, &tData, ptX, &metropolis3); if (fitIters != 0) { bestSample = dutils.fitSigma(acceptanceRates, parameterResults, fitIters, &tParams, &tData, ptX, &metropolis3); } } /*free up allocated memory*/ gsl_vector_free(ptX); dutils.freeAbundance(&tData); #endif outputs.push_back(outFileStub + "_" + toString(bestSample) + ".sample"); if (bestSample == 0) { outputs.push_back(outFileStub + "_1.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 1) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_2.sample"); } else if (bestSample == 2) { outputs.push_back(outFileStub + "_0.sample"); outputs.push_back(outFileStub + "_1.sample"); } return outputs; } catch(exception& e) { m->errorOut(e, "MetroSichel", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/metrosichel.hpp000066400000000000000000000015771424121717000217760ustar00rootroot00000000000000// // metrosichel.hpp // Mothur // // Created by Sarah Westcott on 5/3/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef metrosichel_hpp #define metrosichel_hpp #include "diversityutils.hpp" #include "diversitycalc.h" //MetroSichel - Fits the compound Poisson Sichel dist /***********************************************************************/ class MetroSichel : public DiversityCalculator { public: MetroSichel(int af, double siga, double sigb, double sigg, double sigS, int n, string st); ~MetroSichel() = default; vector getValues(SAbundVector* rank); string getTag() { return "si"; } private: double sigmaA, sigmaB, sigmaG, sigmaS; int nIters, fitIters; string outFileStub; }; /***********************************************************************/ #endif /* metrosichel_hpp */ mothur-1.48.0/source/calculators/npshannon.cpp000077500000000000000000000020711424121717000214460ustar00rootroot00000000000000/* * npshannon.cpp * Dotur * * Created by John Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "npshannon.h" /***********************************************************************/ EstOutput NPShannon::getValues(SAbundVector* rank){ try { data.resize(1,0); float npShannon = 0.0000; double maxRank = (double)rank->getMaxRank(); double sampled = rank->getNumSeqs(); double Chat = 1.0000 - (double)rank->get(1)/(double)sampled; if(Chat>0) { for(int i=1;i<=maxRank;i++){ double pi = ((double) i)/((double)sampled); double ChatPi = Chat*pi; if(ChatPi>0){ npShannon += rank->get(i) * ChatPi*log(ChatPi)/(1-pow(1-ChatPi,(double)sampled)); } } npShannon = -npShannon; } else{ npShannon = 0.000; } data[0] = npShannon; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "NPShannon", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/npshannon.h000077500000000000000000000014061424121717000211140ustar00rootroot00000000000000#ifndef NPSHANNON_H #define NPSHANNON_H /* * npshannon.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the NPShannon estimator on single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class NPShannon : public Calculator { public: NPShannon() : Calculator("npshannon", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Npshannon"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/npv.cpp000077500000000000000000000014611424121717000202510ustar00rootroot00000000000000// // npv.cpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "npv.hpp" /***********************************************************************/ double NPV::getValue(double tp, double tn, double fp, double fn) { try { long long nPrime = tn + fn; double negativePredictiveValue = tn / (double) nPrime; if(nPrime == 0) { negativePredictiveValue = 0; } if (isnan(negativePredictiveValue) || isinf(negativePredictiveValue)) { negativePredictiveValue = 0; } return negativePredictiveValue; } catch(exception& e) { m->errorOut(e, "NPV", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/npv.hpp000077500000000000000000000011421424121717000202520ustar00rootroot00000000000000// // npv.hpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef npv_hpp #define npv_hpp #include "calculator.h" /***********************************************************************/ class NPV : public ClusterMetric { public: NPV() : ClusterMetric("npv") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/NPV"; } private: }; /***********************************************************************/ #endif /* npv_hpp */ mothur-1.48.0/source/calculators/nseqs.h000077500000000000000000000022631424121717000202450ustar00rootroot00000000000000#ifndef NSEQS_H #define NSEQS_H /* * nseqs.h * Mothur * * Created by Sarah Westcott on 3/16/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class NSeqs : public Calculator { public: NSeqs() : Calculator("nseqs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ data.resize(1,0); data[0] = (double)rank->getNumSeqs(); return data; } EstOutput getValues(vector shared) { //return number of sequences in the sharedotus int numGroups = shared.size(); data.clear(); data.resize(numGroups,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //get bin values and set sharedByAll bool sharedByAll = true; for (int j = 0; j < numGroups; j++) { if (shared[j]->get(i) == 0) { sharedByAll = false; } } //they are shared if (sharedByAll ) { for (int j = 0; j < numGroups; j++) { data[j] += shared[j]->get(i); } } } return data; } string getCitation() { return "http://www.mothur.org/wiki/Nseqs"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/odum.cpp000077500000000000000000000014671424121717000204200ustar00rootroot00000000000000/* * odum.cpp * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "odum.h" /***********************************************************************/ EstOutput Odum::getValues(vector shared) { try { data.resize(1,0); double sumNum = 0.0; double sumDenom = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); sumNum += abs(Aij - Bij); sumDenom += (Aij + Bij); } data[0] = sumNum / sumDenom; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Odum", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/odum.h000077500000000000000000000011501424121717000200520ustar00rootroot00000000000000#ifndef ODUM_H #define ODUM_H /* * odum.h * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Odum : public Calculator { public: Odum() : Calculator("odum", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Odum"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/onegapdist.cpp000066400000000000000000000130341424121717000215770ustar00rootroot00000000000000// // onegapdist.cpp // Mothur // // Created by Sarah Westcott on 3/27/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "onegapdist.h" /***********************************************************************/ double oneGapDist::calcDist(Sequence A, Sequence B){ try { int difference = 0; bool openGapA = false; bool openGapB = false; string seqA = A.getAligned(); string seqB = B.getAligned(); int alignLength = seqA.length(); int start = setStart(seqA, seqB); int end = setEnd(seqA, seqB); int maxMinLength = end - start + 1; for(int i=start;i cutoff) { return 1.0000; } } if(maxMinLength == 0) { dist = 1.0000; } else { dist = (double)difference / maxMinLength; } return dist; } catch(exception& e) { m->errorOut(e, "oneGapDist", "calcDist"); exit(1); } } /***********************************************************************/ vector oneGapDist::calcDist(Sequence A, classifierOTU otu, vector cols){ //this function calcs the distance using only the columns provided try { vector dists; dists.resize(otu.numSeqs, 0.0); //if you didn't select columns, use all columns if (cols.size() == 0) { for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); } } classifierOTU seq(A.getAligned()); vector starts = setStarts(seq, otu, cols); vector ends = setEnds(seq, otu, cols); int alignLength = cols.size(); for (int h = 0; h < otu.numSeqs; h++) { if (m->getControl_pressed()) { break; } int maxMinLength = ends[h] - starts[h] + 1; int difference = 0; bool openGapA = false; bool openGapB = false; for(int i=starts[h];i otuChars = otu.otuData[cols[i]]; char seqB = otuChars[0]; //assume column if identical if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; } if((seqA == '-' && seqB == '-') || (seqA == '.' && seqB == '-') || (seqA == '-' && seqB == '.')){ maxMinLength--; } //trailing gaps, quit we already calculated all the diffs else if(seqA == '.' && seqB == '.'){ i+=alignLength; } //break; else if(seqB != '-' && (seqA == '-' || seqA == '.')){ //seqB is a base, seqA is a gap if(!openGapA){ difference++; openGapA = true; openGapB = false; }else { maxMinLength--; } } else if(seqA != '-' && (seqB == '-' || seqB == '.')){ //seqA is a base, seqB is a gap if(!openGapB){ difference++; openGapA = false; openGapB = true; }else { maxMinLength--; } } else if(seqA != '-' && seqB != '-'){ //both bases openGapA = false; openGapB = false; //no match if(seqA != seqB){ difference++; } } double distance = 1.0; distance = (double)difference / maxMinLength; if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break; } if(maxMinLength == 0) { dists[h] = 1.0000; } else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set } return dists; } catch(exception& e) { m->errorOut(e, "oneGapDist", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/onegapdist.h000077500000000000000000000016101424121717000212440ustar00rootroot00000000000000#ifndef ONEGAPDIST_H #define ONEGAPDIST_H /* * onegapdist.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /**************************************************************************************************/ class oneGapDist : public DistCalc { public: oneGapDist(double c) : DistCalc(c) {} //finds the distance from A to each seq in otu. //this function calcs the distance using only the columns provided, if cols is empty, use all vector calcDist(Sequence A, classifierOTU otu, vector cols); double calcDist(Sequence A, Sequence B); //calc distance between 2 seqeunces string getCitation() { return "http://mothur.org"; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/onegapignore.cpp000066400000000000000000000131601424121717000221170ustar00rootroot00000000000000// // onegapignore.cpp // Mothur // // Created by Sarah Westcott on 4/20/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "onegapignore.h" /***********************************************************************/ double oneGapIgnoreTermGapDist::calcDist(Sequence A, Sequence B){ try { string seqA = A.getAligned(); string seqB = B.getAligned(); bool overlap = false; int start = setStartIgnoreTermGap(seqA, seqB, overlap); int end = setEndIgnoreTermGap(seqA, seqB, overlap); //non-overlapping sequences if (!overlap) { return 1.0000; } int maxMinLength = end - start; int difference = 0; bool openGapA = false; bool openGapB = false; for(int i=start;i<=end;i++){ if(seqA[i] == '-' && seqB[i] == '-'){ maxMinLength--; } //comparing gaps, ignore else if(seqB[i] != '-' && seqA[i] == '-'){ //seqB is a base, seqA is a gap if(!openGapA){ difference++; openGapA = true; openGapB = false; }else { maxMinLength--; } } else if(seqA[i] != '-' && seqB[i] == '-'){ //seqA is a base, seqB is a gap if(!openGapB){ difference++; openGapA = false; openGapB = true; }else { maxMinLength--; } } else if(seqA[i] != '-' && seqB[i] != '-'){ //both bases openGapA = false; openGapB = false; //no match if(seqA[i] != seqB[i]){ difference++; } } dist = (double)difference / maxMinLength; if (dist > cutoff) { return 1.0000; } } if(maxMinLength == 0) { dist = 1.0000; } else { dist = (double)difference / maxMinLength; } return dist; } catch(exception& e) { m->errorOut(e, "oneGapDist", "calcDist"); exit(1); } } /***********************************************************************/ vector oneGapIgnoreTermGapDist::calcDist(Sequence A, classifierOTU otu, vector cols){ //this function calcs the distance using only the columns provided try { vector dists; dists.resize(otu.numSeqs, 0.0); //if you didn't select columns, use all columns if (cols.size() == 0) { for (int i = 0; i < otu.otuData.size(); i++) { cols.push_back(i); } } classifierOTU seq(A.getAligned()); vector starts = setStartsIgnoreTermGap(seq, otu, cols); vector ends = setEndsIgnoreTermGap(seq, otu, cols); int alignLength = cols.size(); for (int h = 0; h < otu.numSeqs; h++) { if (m->getControl_pressed()) { break; } if ((starts[h] == -1) && (ends[h] == -1)) { dists[h] = 1.0000; } //no overlap else { if (starts[h] == -1) { starts[h] = 0; } if (ends[h] == -1) { ends[h] = 0; } int maxMinLength = ends[h] - starts[h]; int difference = 0; bool openGapA = false; bool openGapB = false; for(int i=starts[h];i<=ends[h];i++){ char seqA = seq.otuData[cols[i]][0]; vector otuChars = otu.otuData[cols[i]]; char seqB = otuChars[0]; //assume column if identical if (otuChars.size() == otu.numSeqs) { seqB = otuChars[h]; } if(seqA == '-' && seqB == '-'){ maxMinLength--; } //comparing gaps, ignore else if(seqB != '-' && seqA == '-'){ //seqB is a base, seqA is a gap if(!openGapA){ difference++; openGapA = true; openGapB = false; }else { maxMinLength--; } } else if(seqA != '-' && seqB == '-'){ //seqA is a base, seqB is a gap if(!openGapB){ difference++; openGapA = false; openGapB = true; }else { maxMinLength--; } } else if(seqA != '-' && seqB != '-'){ //both bases openGapA = false; openGapB = false; //no match if(seqA != seqB){ difference++; } } double distance = 1.0; distance = (double)difference / maxMinLength; if (distance > cutoff) { dists[h] = 1.0000; i+=alignLength; } //break; } if(maxMinLength == 0) { dists[h] = 1.0000; } else if (dists[h] == 0.0) { dists[h] = (double)difference / maxMinLength; } //not set } } return dists; } catch(exception& e) { m->errorOut(e, "oneGapDist", "calcDist"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/onegapignore.h000077500000000000000000000017141424121717000215710ustar00rootroot00000000000000#ifndef ONEIGNOREGAPS_H #define ONEIGNOREGAPS_H /* * onegapignore.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /**************************************************************************************************/ class oneGapIgnoreTermGapDist : public DistCalc { public: oneGapIgnoreTermGapDist(double c) : DistCalc(c) {} //finds the distance from A to each seq in otu. //this function calcs the distance using only the columns provided, if cols is empty, use all vector calcDist(Sequence A, classifierOTU otu, vector cols); double calcDist(Sequence A, Sequence B); //calc distance between 2 seqeunces string getCitation() { return "http://mothur.org"; } private: }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/pam.hpp000066400000000000000000000170151424121717000202270ustar00rootroot00000000000000// // pam.hpp // Mothur // // Created by Sarah Westcott on 7/1/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef pam_hpp #define pam_hpp #include "calculator.h" /* dcmut version of PAM model from www.ebi.ac.uk/goldman-srv/dayhoff/ */ static double pameigs[] = {0,-1.93321786301018,-2.20904642493621,-1.74835983874903, -1.64854548332072,-1.54505559488222,-1.33859384676989,-1.29786201193594, -0.235548517495575,-0.266951066089808,-0.28965813670665,-1.10505826965282, -1.04323310568532,-0.430423720979904,-0.541719761016713,-0.879636093986914, -0.711249353378695,-0.725050487280602,-0.776855937389452,-0.808735559461343}; static double pamprobs[20][20] ={ {0.08712695644, 0.04090397955, 0.04043197978, 0.04687197656, 0.03347398326, 0.03825498087, 0.04952997524, 0.08861195569, 0.03361898319, 0.03688598156, 0.08535695732, 0.08048095976, 0.01475299262, 0.03977198011, 0.05067997466, 0.06957696521, 0.05854197073, 0.01049399475, 0.02991598504, 0.06471796764}, {0.07991048383, 0.006888314018, 0.03857806206, 0.07947073194, 0.004895492884, 0.03815829405, -0.1087562465, 0.008691167141, -0.0140554828, 0.001306404001, -0.001888411299, -0.006921303342, 0.0007655604228, 0.001583298443, 0.006879590446, -0.171806883, 0.04890917949, 0.0006700432804, 0.0002276237277, -0.01350591875}, {-0.01641514483, -0.007233933239, -0.1377830621, 0.1163201333, -0.002305138017, 0.01557250366, -0.07455879489, -0.003225343503, 0.0140630487, 0.005112274204, 0.001405731862, 0.01975833782, -0.001348402973, -0.001085733262, -0.003880514478, 0.0851493313, -0.01163526615, -0.0001197903399, 0.002056153393, 0.0001536095643}, {0.009669278686, -0.006905863869, 0.101083544, 0.01179903104, -0.003780967591, 0.05845105878, -0.09138357299, -0.02850503638, -0.03233951408, 0.008708065876, -0.004700705411, -0.02053221579, 0.001165851398, -0.001366585849, -0.01317695074, 0.1199985703, -0.1146346193, -0.0005953021314, -0.0004297615194, 0.007475695618}, {0.1722243502, -0.003737582995, -0.02964873222, -0.02050116381, -0.0004530478465, -0.02460043205, 0.02280768412, -0.02127364909, 0.01570095258, 0.1027744285, -0.005330539586, 0.0179697651, -0.002904077286, -0.007068126663, -0.0142869583, -0.01444241844, -0.08218861544, 0.0002069181629, 0.001099671379, -0.1063484263}, {-0.1553433627, -0.001169168032, 0.02134785337, 0.0007602305436, 0.0001395330122, 0.03194992019, -0.01290252206, 0.03281720789, -0.01311103735, 0.1177254769, -0.008008783885, -0.02375317548, -0.002817809762, -0.008196682776, 0.01731267617, 0.01853526375, 0.08249908546, -2.788771776e-05, 0.001266182191, -0.09902299976}, {-0.03671080341, 0.0274168035, 0.04625877597, 0.07520706414, -0.0001833803619, -0.1207833161, -0.006415807779, -0.005465629648, 0.02778273972, 0.007589688485, -0.02945266034, -0.03797542064, 0.07044042052, -0.002018573865, 0.01845277071, 0.006901513991, -0.02430934639, -0.0005919635873, -0.001266962331, -0.01487591261}, {-0.03060317816, 0.01182361623, 0.04200270053, 0.05406235279, -0.0003920498815, -0.09159709348, -0.009602690652, -0.00382944418, 0.01761361993, 0.01605684317, 0.05198878008, 0.02198696949, -0.09308930025, -0.00102622863, 0.01477637127, 0.0009314065393, -0.01860959472, -0.0005964703968, -0.002694284083, 0.02079767439}, {0.0195976494, -0.005104484936, 0.007406728707, 0.01236244954, 0.0201446796, 0.007039564785, 0.01276942134, 0.02641595685, 0.002764624354, 0.001273314658, -0.01335316035, 0.01105658671, 2.148773499e-05, -0.02692205639, 0.0118684991, 0.01212624708, 0.01127770094, -0.09842754796, -0.01942336432, 0.007105703151}, {-0.01819461888, -0.01509348507, -0.01297636935, -0.01996453439, 0.1715705905, -0.01601550692, -0.02122706144, -0.02854628494, -0.009351082371, -0.001527995472, -0.010198224, -0.03609537551, -0.003153182095, 0.02395980501, -0.01378664626, -0.005992611421, -0.01176810875, 0.003132361603, 0.03018439539, -0.004956065656}, {-0.02733614784, -0.02258066705, -0.0153112506, -0.02475728664, -0.04480525045, -0.01526640341, -0.02438517425, -0.04836914601, -0.00635964824, 0.02263169831, 0.09794101931, -0.04004304158, 0.008464393478, 0.1185443142, -0.02239294163, -0.0281550321, -0.01453581604, -0.0246742804, 0.0879619849, 0.02342867605}, {0.06483718238, 0.1260012082, -0.006496013283, 0.009914915531, -0.004181603532, 0.0003493226286, 0.01408035752, -0.04881663016, -0.03431167356, -0.01768005602, 0.02362447761, -0.1482364784, -0.01289035619, -0.001778893279, -0.05240099752, 0.05536174567, 0.06782165352, -0.003548568717, 0.001125301173, -0.03277489363}, {0.06520296909, -0.0754802543, 0.03139281903, -0.03266449554, -0.004485188002, -0.03389072036, -0.06163274338, -0.06484769882, 0.05722658289, -0.02824079619, 0.01544837349, 0.03909752708, 0.002029218884, 0.003151939572, -0.05471208363, 0.07962008342, 0.125916047, 0.0008696184937, -0.01086027514, -0.05314092355}, {0.004543119081, 0.01935177735, 0.01905511007, 0.02682993409, -0.01199617967, 0.01426278655, 0.02472521255, 0.03864795501, 0.02166224804, -0.04754243479, -0.1921545477, 0.03621321546, -0.02120627881, 0.04928097895, 0.009396088815, 0.01748042052, -6.173742851e-05, -0.003168033098, 0.07723565812, -0.08255529309}, {0.06710378668, -0.09441410284, -0.004801776989, 0.008830272165, -0.01021645042, -0.02764365608, 0.004250361851, 0.1648777542, -0.037446109, 0.004541057635, -0.0296980702, -0.1532325189, -0.008940580901, 0.006998050812, 0.02338809379, 0.03175059182, 0.02033965512, 0.006388075608, 0.001762762044, 0.02616280361}, {0.01915943021, -0.05432967274, 0.01249342683, 0.06836622457, 0.002054462161, -0.01233535859, 0.07087282652, -0.08948637051, -0.1245896013, -0.02204522882, 0.03791481736, 0.06557467874, 0.005529294156, -0.006296644235, 0.02144530752, 0.01664230081, 0.02647078439, 0.001737725271, 0.01414149877, -0.05331990116}, {0.0266659303, 0.0564142853, -0.0263767738, -0.08029726006, -0.006059357163, -0.06317558457, -0.0911894019, 0.05401487057, -0.08178072458, 0.01580699778, -0.05370550396, 0.09798653264, 0.003934944022, 0.01977291947, 0.0441198541, 0.02788220393, 0.03201877081, -0.00206161759, -0.005101423308, 0.03113033802}, {0.02980360751, -0.009513246268, -0.009543527165, -0.02190644172, -0.006146440672, 0.01207009085, -0.0126989156, -0.1378266418, 0.0275235217, 0.00551720592, -0.03104791544, -0.07111701247, -0.006081754489, -0.01337494521, 0.1783961085, 0.01453225059, 0.01938736048, 0.0004488631071, 0.0110844398, 0.02049339243}, {-0.01433508581, 0.01258858175, -0.004294252236, -0.007146532854, 0.009541628809, 0.008040155729, -0.006857781832, 0.05584120066, 0.007749418365, -0.05867835844, 0.08008131283, -0.004877854222, -0.0007128540743, 0.09489058424, 0.06421121962, 0.00271493526, -0.03229944773, -0.001732026038, -0.08053448316, -0.1241903609}, {-0.009854113227, 0.01294129929, -0.00593064392, -0.03016833115, -0.002018439732, -0.00792418722, -0.03372768732, 0.07828561288, 0.007722254639, -0.05067377561, 0.1191848621, 0.005059475202, 0.004762387166, -0.1029870175, 0.03537190114, 0.001089956203, -0.02139157573, -0.001015245062, 0.08400521847, -0.08273195059}}; /**************************************************************************************************/ //PAM - Dayhoff PAM matrix class PAM : public DistCalc { public: PAM(double c) : DistCalc(c) { name = "PAM (Dayhoff PAM matrix)"; } double calcDist(Protein A, Protein B) { return (makeDists(A, B, pameigs, pamprobs)); } //calc distance between 2 seqeunces string getCitation() { return "https://evolution.gs.washington.edu/phylip/doc/protdist.html, https://evolution.genetics.washington.edu/phylip/credits.html"; } private: }; /**************************************************************************************************/ #endif /* pam_hpp */ mothur-1.48.0/source/calculators/parsimony.cpp000077500000000000000000000127051424121717000214720ustar00rootroot00000000000000/* * parsimony.cpp * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "parsimony.h" /**************************************************************************************************/ Parsimony::Parsimony(vector G) : Groups(G) { try { int numGroups = Groups.size(); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; for (int i=0; i groups; groups.push_back(Groups[i]); groups.push_back(Groups[l]); namesOfGroupCombos.push_back(groups); } } } catch(exception& e) { m->errorOut(e, "Parsimony", "Parsimony"); exit(1); } } /**************************************************************************************************/ EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); Treenames = t->getTreeNames(); return (createProcesses(t, ct)); } catch(exception& e) { m->errorOut(e, "Parsimony", "getValues"); exit(1); } } /**************************************************************************************************/ void driverPars(parsData* params) { try { Tree copyTree(params->ct, params->Treenames); int count = 0; for (int h = params->start; h < (params->start+params->num); h++) { if (params->m->getControl_pressed()) { break; } int score = 0; //groups in this combo vector groups = params->namesOfGroupCombos[h]; //copy users tree so that you can redo pgroups copyTree.getCopy(params->t); //create pgroups that reflect the groups the user want to use for(int i=copyTree.getNumLeaves();im->getControl_pressed()) { break; } int lc = copyTree.tree[i].getLChild(); int rc = copyTree.tree[i].getRChild(); int iSize = copyTree.tree[i].pGroups.size(); int rcSize = copyTree.tree[rc].pGroups.size(); int lcSize = copyTree.tree[lc].pGroups.size(); //if isize are 0 then that branch is to be ignored if (iSize == 0) { } else if ((rcSize == 0) || (lcSize == 0)) { } //if you have more groups than either of your kids then theres been a change. else if(iSize > rcSize || iSize > lcSize){ score++; } } params->results[count] = score; count++; } } catch(exception& e) { params->m->errorOut(e, "Parsimony", "driver"); exit(1); } } /**************************************************************************************************/ EstOutput Parsimony::createProcesses(Tree* t, CountTable* ct) { try { vector lines; int remainingPairs = namesOfGroupCombos.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); parsData* dataBundle = new parsData(lines[i+1].start, lines[i+1].end, namesOfGroupCombos, copyTree, copyCount); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverPars, dataBundle)); } parsData* dataBundle = new parsData(lines[0].start, lines[0].end, namesOfGroupCombos, t, ct); driverPars(dataBundle); EstOutput results = dataBundle->results; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->results.size(); j++) { results.push_back(data[i]->results[j]); } delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } return results; } catch(exception& e) { m->errorOut(e, "Parsimony", "createProcesses"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/calculators/parsimony.h000077500000000000000000000062561424121717000211430ustar00rootroot00000000000000#ifndef PARSIMONY_H #define PARSIMONY_H /* * parsimony.h * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treecalculator.h" #include "counttable.h" /***********************************************************************/ class Parsimony : public TreeCalculator { public: Parsimony(vector G); ~Parsimony() = default;; EstOutput getValues(Tree*, int, string); private: vector Groups, Treenames; int processors; string outputDir; Utils util; vector< vector > namesOfGroupCombos; EstOutput createProcesses(Tree*, CountTable*); }; /***********************************************************************/ struct parsData { int start; int num; MothurOut* m; EstOutput results; vector< vector > namesOfGroupCombos; Tree* t; CountTable* ct; Utils util; vector Treenames; parsData(){} parsData(int st, int en, vector< vector > ngc, Tree* tree, CountTable* count) { m = MothurOut::getInstance(); start = st; num = en; namesOfGroupCombos = ngc; t = tree; ct = count; Treenames = t->getTreeNames(); results.resize(num); } }; /**************************************************************************************************/ #if defined NON_WINDOWS #else static DWORD WINAPI MyParsimonyThreadFunction(LPVOID lpParam){ parsData* pDataArray; pDataArray = (parsData*)lpParam; try { pDataArray->results.resize(pDataArray->num); Tree* copyTree = new Tree(pDataArray->ct, pDataArray->Treenames); int count = 0; for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) { if (pDataArray->m->getControl_pressed()) { delete copyTree; return 0; } int score = 0; //groups in this combo vector groups = pDataArray->namesOfGroupCombos[h]; //copy users tree so that you can redo pgroups copyTree->getCopy(pDataArray->t); //create pgroups that reflect the groups the user want to use for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups)); } for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ if (pDataArray->m->getControl_pressed()) { return 0; } int lc = copyTree->tree[i].getLChild(); int rc = copyTree->tree[i].getRChild(); int iSize = copyTree->tree[i].pGroups.size(); int rcSize = copyTree->tree[rc].pGroups.size(); int lcSize = copyTree->tree[lc].pGroups.size(); //if isize are 0 then that branch is to be ignored if (iSize == 0) { } else if ((rcSize == 0) || (lcSize == 0)) { } //if you have more groups than either of your kids then theres been a change. else if(iSize > rcSize || iSize > lcSize){ score++; } } pDataArray->results[count] = score; count++; } delete copyTree; return 0; } catch(exception& e) { pDataArray->m->errorOut(e, "Parsimony", "MyParsimonyThreadFunction"); exit(1); } } #endif #endif mothur-1.48.0/source/calculators/pmb.hpp000066400000000000000000000222001424121717000202200ustar00rootroot00000000000000// // pmb.hpp // Mothur // // Created by Sarah Westcott on 6/29/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef pmb_hpp #define pmb_hpp #include "calculator.h" /* PMB matrix decomposition courtesy of Elisabeth Tillier */ static double pmbeigs[] = {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300, -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300, -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760, -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510, -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040}; static double pmbprobs[20][20] = {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510, 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993, 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174, 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471, 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984}, {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960, 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568, -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477, -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238, 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580}, {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017, -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140, 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452, 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622, -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110}, {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311, 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396, 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634, 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470, 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554}, {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332, -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503, 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955, -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949, -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231}, {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294, -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230, -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110, 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026, 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157}, {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348, -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148, -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119, -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032, -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531}, {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510, 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848, -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173, 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547, -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150}, {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260, -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657, 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620, -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268, -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237}, {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825, -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180, -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271, -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018, 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408}, {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031, -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380, -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676, 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066, 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361}, {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398, -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060, 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731, -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773, 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485}, {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889, -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969, 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622, 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299, -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921}, {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863, 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317, 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183, 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290, -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899}, {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487, -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020, -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672, 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102, -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623}, {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781, -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211, -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901, 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802, 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313}, {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606, 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308, -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931, -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758, -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698}, {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688, -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452, 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972, 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123, 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883}, {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340, -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125, 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510, 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209, 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393}, {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850, -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018, -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651, 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622, 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}} ; /**************************************************************************************************/ //PMB (Henikoff/Tillier PMB matrix) class PMB : public DistCalc { public: PMB(double c) : DistCalc(c) { name = "PMB (Henikoff/Tillier PMB matrix)"; } double calcDist(Protein A, Protein B) { return (makeDists(A, B, pmbeigs, pmbprobs)); } //calc distance between 2 seqeunces string getCitation() { return "https://evolution.gs.washington.edu/phylip/doc/protdist.html, https://evolution.genetics.washington.edu/phylip/credits.html"; } private: }; /**************************************************************************************************/ #endif /* pmb_hpp */ mothur-1.48.0/source/calculators/ppv.cpp000077500000000000000000000014611424121717000202530ustar00rootroot00000000000000// // ppv.cpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "ppv.hpp" /***********************************************************************/ double PPV::getValue(double tp, double tn, double fp, double fn) { try { long long pPrime = tp + fp; double positivePredictiveValue = tp / (double) pPrime; if(pPrime == 0) { positivePredictiveValue = 0; } if (isnan(positivePredictiveValue) || isinf(positivePredictiveValue)) { positivePredictiveValue = 0; } return positivePredictiveValue; } catch(exception& e) { m->errorOut(e, "PPV", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/ppv.hpp000077500000000000000000000011411424121717000202530ustar00rootroot00000000000000// // ppv.hpp // Mothur // // Created by Sarah Westcott on 4/11/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef ppv_hpp #define ppv_hpp #include "calculator.h" /***********************************************************************/ class PPV : public ClusterMetric { public: PPV() : ClusterMetric("ppv") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/PPV"; } private: }; /***********************************************************************/ #endif /* ppv_hpp */ mothur-1.48.0/source/calculators/prng.cpp000077500000000000000000000144541424121717000204220ustar00rootroot00000000000000/* A C-program for MT19937, with initialization improved 2002/1/26. Coded by Takuji Nishimura and Makoto Matsumoto. Before using, initialize the state by using init_genrand(seed) or init_by_array(init_key, key_length). Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Any feedback is very welcome. http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) */ #include #include "prng.h" /* Period parameters */ #define N 624 #define M 397 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ #define UPPER_MASK 0x80000000UL /* most significant w-r bits */ #define LOWER_MASK 0x7fffffffUL /* least significant r bits */ #define NJ_RAND_MAX 0x7fffffffUL static unsigned long mt[N]; /* the array for the state vector */ static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ /* initializes mt[N] with a seed */ void init_genrand(unsigned long s) { mt[0]= s & 0xffffffffUL; for (mti=1; mti> 30)) + mti); /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ /* In the previous versions, MSBs of the seed affect */ /* only MSBs of the array mt[]. */ /* 2002/01/09 modified by Makoto Matsumoto */ mt[mti] &= 0xffffffffUL; /* for >32 bit machines */ } } /* initialize by an array with array-length */ /* init_key is the array for initializing keys */ /* key_length is its length */ /* slight change for C++, 2004/2/26 */ void init_by_array(unsigned long init_key[], int key_length) { int i, j, k; init_genrand(19650218UL); i=1; j=0; k = (N>key_length ? N : key_length); for (; k; k--) { mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) + init_key[j] + j; /* non linear */ mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ i++; j++; if (i>=N) { mt[0] = mt[N-1]; i=1; } if (j>=key_length) j=0; } for (k=N-1; k; k--) { mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) - i; /* non linear */ mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ i++; if (i>=N) { mt[0] = mt[N-1]; i=1; } } mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ } /* generates a random number on [0,0xffffffff]-interval */ unsigned long genrand_int32(void) { unsigned long y; static unsigned long mag01[2]={0x0UL, MATRIX_A}; /* mag01[x] = x * MATRIX_A for x=0,1 */ if (mti >= N) { /* generate N words at one time */ int kk; if (mti == N+1) /* if init_genrand() has not been called, */ init_genrand(5489UL); /* a default initial seed is used */ for (kk=0;kk> 1) ^ mag01[y & 0x1UL]; } for (;kk> 1) ^ mag01[y & 0x1UL]; } y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; mti = 0; } y = mt[mti++]; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; y ^= (y >> 18); return y; } /* generates a random number on [0,0x7fffffff]-interval */ long int genrand_int31(void) { return (long)(genrand_int32()>>1); } /* These real versions are due to Isaku Wada, 2002/01/09 added */ /* generates a random number on [0,1]-real-interval */ double genrand_real1(void) { return genrand_int32()*(1.0/4294967295.0); /* divided by 2^32-1 */ } /* generates a random number on [0,1)-real-interval */ double genrand_real2(void) { return genrand_int32()*(1.0/4294967296.0); /* divided by 2^32 */ } /* generates a random number on (0,1)-real-interval */ double genrand_real3(void) { return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); /* divided by 2^32 */ } /* generates a random number on [0,1) with 53-bit resolution*/ double genrand_res53(void) { unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; return(a*67108864.0+b)*(1.0/9007199254740992.0); } /* * NJ_genrand_int31_top() - Returns an int in the range 0..top * * This function attempts to remove bias in selecting random * integers in a range. * */ long int NJ_genrand_int31_top(long int top) { long int overflow; long int r; long int retval; if(top <= 0) { return(0); } else { overflow = (NJ_RAND_MAX / top) * top; } while(1) { r = genrand_int31(); if(r < overflow) { break; } } retval = r % top; return(retval); } mothur-1.48.0/source/calculators/prng.h000077500000000000000000000047331424121717000200660ustar00rootroot00000000000000/* * prng.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Some function prototypes for the Mersenne Twister PRNG * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_PRNG_H_ #define _INC_PRNG_H_ 1 #ifdef __cplusplus extern "C" { #endif #define NJ_RAND_MAX 0x7fffffffUL /* some function prototypes */ void init_genrand(unsigned long s); void init_by_array(unsigned long init_key[], int key_length); unsigned long genrand_int32(void); long int genrand_int31(void); double genrand_real1(void); double genrand_real2(void); double genrand_real3(void); double genrand_res53(void); long int NJ_genrand_int31_top(long int top); #ifdef __cplusplus } #endif #endif /* _INC_PRNG_H_ */ mothur-1.48.0/source/calculators/qstat.cpp000077500000000000000000000033661424121717000206100ustar00rootroot00000000000000/* * qstat.cpp * Mothur * * Created by Thomas Ryabin on 3/4/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "qstat.h" /***********************************************************************/ EstOutput QStat::getValues(SAbundVector* rank){ try { /*test data VVV int dstring[] = {0,0,1,4,2,0,2,1,1,1,1,1,0,1,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; vector dvec; for(int i = 0; i < 171; i++) dvec.push_back(dstring[i]); int mr = 170; int nb = 29; int ns = 884; SAbundVector rankw = SAbundVector(dvec, mr,nb,ns); SAbundVector *rank = &rankw;*/ data.resize(1,0); int numSpec = rank->getNumBins(); int r1 = -1; int r3 = -1; int r1Ind = 0; int r3Ind = 0; double sumSpec = 0; double iqSum = 0; for(int i = 1; i < rank->size(); i++) { if(r1 != -1 && r3 != -1) i = rank->size(); sumSpec += rank->get(i); if(r1 == -1 && sumSpec >= numSpec*.25) { r1 = rank->get(i); r1Ind = i; } else if(r3 == -1 && sumSpec >= numSpec*.75) { r3 = rank->get(i); r3Ind = i; } else if(sumSpec >= numSpec*.25 && sumSpec < numSpec*.75) iqSum += rank->get(i); } double qstat = (.5*r1 + iqSum + .5*r3)/log((double)r3Ind/r1Ind); data[0] = qstat; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "QStat", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/qstat.h000077500000000000000000000013651424121717000202520ustar00rootroot00000000000000#ifndef QSTAT_H #define QSTAT_H /* * qstat.h * Mothur * * Created by Thomas Ryabin on 3/4/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the q statistic on single group. It is a child of the calculator class.*/ /***********************************************************************/ class QStat : public Calculator { public: QStat() : Calculator("qstat", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Qstat"; } private: RAbundVector rdata; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sensitivity.cpp000077500000000000000000000013701424121717000220370ustar00rootroot00000000000000// // sensitivity.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "sensitivity.hpp" /***********************************************************************/ double Sensitivity::getValue(double tp, double tn, double fp, double fn) { try { long long p = tp + fn; double sensitivity = tp / (double) p; if(p == 0) { sensitivity = 0; } if (isnan(sensitivity) || isinf(sensitivity)) { sensitivity = 0; } return sensitivity; } catch(exception& e) { m->errorOut(e, "Sensitivity", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sensitivity.hpp000077500000000000000000000012511424121717000220420ustar00rootroot00000000000000// // sensitivity.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef sensitivity_hpp #define sensitivity_hpp #include "calculator.h" /***********************************************************************/ class Sensitivity : public ClusterMetric { public: Sensitivity() : ClusterMetric("sens") {}; double getValue(double tp, double tn, double fp, double fn); //ignores tn, fp string getCitation() { return "http://www.mothur.org/wiki/Sensitivity"; } private: }; /***********************************************************************/ #endif /* sensitivity_hpp */ mothur-1.48.0/source/calculators/shannon.cpp000077500000000000000000000026071424121717000211150ustar00rootroot00000000000000/* * shannon.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "shannon.h" /***********************************************************************/ EstOutput Shannon::getValues(SAbundVector* rank){ try { //vector shannonData(3,0); data.resize(3,0); double shannon = 0.0000; //hprime double hvara=0.0000; double maxRank = rank->getMaxRank(); int sampled = rank->getNumSeqs(); int sobs = rank->getNumBins(); for(int i=1;i<=maxRank;i++){ double p = ((double) i)/((double)sampled); shannon += (double)rank->get(i)*p*log(p); //hprime hvara += (double)rank->get(i)*p*pow(log(p),2); } shannon = -shannon; double hvar = (hvara-pow(shannon,2))/(double)sampled+(double)(sobs-1)/(double)(2*sampled*sampled); double ci = 0; if(hvar>0){ ci = 1.96*pow(hvar,0.5); } double shannonhci = shannon + ci; double shannonlci = shannon - ci; data[0] = shannon; data[1] = shannonlci; data[2] = shannonhci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Shannon", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/shannon.h000077500000000000000000000013731424121717000205610ustar00rootroot00000000000000#ifndef SHANNON_H #define SHANNON_H /* * shannon.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Shannon estimator on single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Shannon : public Calculator { public: Shannon() : Calculator("shannon", 3, false) {}; EstOutput getValues(SAbundVector* rank); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Shannon"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/shannoneven.cpp000077500000000000000000000014521424121717000217700ustar00rootroot00000000000000/* * shannoneven.cpp * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "shannoneven.h" #include "shannon.h" /***********************************************************************/ EstOutput ShannonEven::getValues(SAbundVector* rank){ try { //vector simpsonData(3,0); data.resize(1,0); vector shanData(3,0); Shannon* shannon = new Shannon(); shanData = shannon->getValues(rank); long int sobs = rank->getNumBins(); if(sobs > 1){ data[0] = shanData[0] / log(sobs); } else{ data[0] = 1; } delete shannon; return data; } catch(exception& e) { m->errorOut(e, "ShannonEven", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/shannoneven.h000077500000000000000000000012101424121717000214250ustar00rootroot00000000000000#ifndef SHANNONEVEN #define SHANNONEVEN /* * shannoneven.h * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class ShannonEven : public Calculator { public: ShannonEven() : Calculator("shannoneven", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Shannoneven"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/shannonrange.cpp000077500000000000000000000061001424121717000221220ustar00rootroot00000000000000// // shannonrange.cpp // Mothur // // Created by SarahsWork on 1/3/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "shannonrange.h" /***********************************************************************/ EstOutput RangeShannon::getValues(SAbundVector* rank){ try { data.resize(3,0); double commSize = 1e20; double sampleSize = rank->getNumSeqs(); vector freqx; vector freqy; for (int i = 1; i <=rank->getMaxRank(); i++) { int abund = rank->get(i); if (abund != 0) { freqx.push_back(i); freqy.push_back(abund); } } double aux = ceil(pow((sampleSize+1), (1/(double)3))); double est0 = max(freqy[0]+1, aux); vector ests; double numr = 0.0; double denr = 0.0; for (int i = 0; i < freqx.size()-1; i++) { if (m->getControl_pressed()) { break; } if (freqx[i+1] == freqx[i]+1) { numr = max(freqy[i+1]+1, aux); } else { numr = aux; } denr = max(freqy[i], aux); ests.push_back((freqx[i]+1)*numr/(double)denr); } numr = aux; denr = max(freqy[freqy.size()-1], aux); ests.push_back((freqx[freqx.size()-1]+1)*numr/(double)denr); double sum = 0.0; for (int i = 0; i < freqy.size(); i++) { sum += (ests[i]*freqy[i]); } double nfac = est0 + sum; est0 /= nfac; for (int i = 0; i < ests.size(); i++) { ests[i] /= nfac; } double abunup = 1 / commSize; double nbrup = est0 / abunup; double abunlow = ests[0]; double nbrlow = est0 / abunlow; if (alpha == 1) { double sum = 0.0; for (int i = 0; i < freqy.size(); i++) { if (m->getControl_pressed()) { break; } sum += (freqy[i] * ests[i] * log(ests[i])); } data[0] = -sum; data[1] = exp(data[0]+nbrlow*(-abunlow*log(abunlow))); data[2] = exp(data[0]+nbrup*(-abunup*log(abunup))); }else { for (int i = 0; i < freqy.size(); i++) { if (m->getControl_pressed()) { break; } data[0] += (freqy[i] * (pow(ests[i],alpha))); } data[1] = pow(data[0]+nbrup*pow(abunup,alpha), (1/(1-alpha))); data[2] = pow(data[0]+nbrlow*pow(abunlow,alpha), (1/(1-alpha))); } //this calc has no data[0], just a lower and upper estimate. set data[0] to lower estimate. data[0] = data[1]; if (data[1] > data[2]) { data[1] = data[2]; data[2] = data[0]; } data[0] = -1.0; //no value if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "RangeShannon", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/shannonrange.h000077500000000000000000000030651424121717000215760ustar00rootroot00000000000000// // shannonrange.h // Mothur // // Created by SarahsWork on 1/3/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // /* 1] Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101. [2] Hill, M. O. (1973). Diversity and evenness: A unifying notation and its consequences. Ecology, 54(2), 427–432. [3] Orlitsky, A., Santhanam, N. P., & Zhang, J. (2003). Always Good Turing: Asymptoti- cally optimal probability estimation. Science, 302(5644), 427–431. [4] Roesch, L. F., Fulthorpe, R. R., Riva, A., Casella, G., Hadwin, A. K., Kent, A. D., et al. (2007). Pyrosequencing enumerates and contrasts soil microbial diversity. The ISME Journal, 1(4), 283–290. */ #ifndef Mothur_shannonrange_h #define Mothur_shannonrange_h #include "calculator.h" /***********************************************************************/ class RangeShannon : public Calculator { public: RangeShannon(int a) : alpha(a), Calculator("rangeshannon", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "Haegeman, B., Hamelin, J., Moriarty, J., Neal, P., Dushoff, J., & Weitz, J. S. (2013). Robust estimation of microbial diversity in theory and in practice. The ISME journal, 7(6), 1092–1101., http://www.mothur.org/wiki/rangeshannon"; } private: int alpha; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedace.cpp000077500000000000000000000074771424121717000214020ustar00rootroot00000000000000 /* * sharedace.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedace.h" /***********************************************************************/ EstOutput SharedAce::getValues(vector shared) { try { data.resize(1,0); string label; label = shared[0]->getLabel(); double fARare, fBRare, S12Rare, S12Abund, S12, f11, tempA, tempB, t10, t01, t11, t21, t12, t22, C12Numerator; fARare = 0; fBRare = 0; S12Rare = 0; S12Abund = 0; S12 = 0; f11 = 0; t10 = 0; t01 = 0; t11= 0; t21= 0; t12= 0; t22= 0; C12Numerator = 0; double Sharedace, C12, part1, part2, part3, part4, part5, Gamma1, Gamma2, Gamma3; /*fARare = number of OTUs with one individual found in A and less than or equal to 10 in B. fBRare = number of OTUs with one individual found in B and less than or equal to 10 in A. arare = number of sequences from A that contain less than 10 sequences. brare = number of sequences from B that contain less than 10 sequences. S12Rare = number of shared OTUs where both of the communities are represented by less than or equal to 10 sequences S12Abund = number of shared OTUs where at least one of the communities is represented by more than 10 sequences S12 = number of shared OTUs in A and B This estimator was changed to reflect Caldwell's changes, eliminating the nrare / nrare - 1 */ for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) {//they are shared S12++; //do both A and B have one if (util.isEqual(tempA, 1) && util.isEqual(tempB,1)) { f11++; } //is A one and B rare if (util.isEqual(tempA, 1) && (tempB <= abund)) { fARare++; } //is B one and A rare if (util.isEqual(tempB, 1) && (tempA <= abund)) { fBRare++; } if ((tempA <= abund) && (tempB <= abund)) { //shared and both rare S12Rare++; t10 += tempA; //Sum Xi t01 += tempB; //Sum Yi //calculate top of C12 // YiI(Xi = 1) if (util.isEqual(tempA, 1)) { C12Numerator += tempB; } //XiI(Yi = 1) if (util.isEqual(tempB, 1)) { C12Numerator += tempA; } //-I(Xi=Yi=1) if (util.isEqual(tempA, 1) && util.isEqual(tempB, 1)) { C12Numerator--; } //calculate t11 - Sum of XiYi t11 += tempA * tempB; //calculate t21 - Sum of Xi(Xi - 1)Yi t21 += tempA * (tempA - 1) * tempB; //calculate t12 - Sum of Xi(Yi - 1)Yi t12 += tempA * (tempB - 1) * tempB; //calculate t22 - Sum of Xi(Xi - 1)Yi(Yi - 1) t22 += tempA * (tempA - 1) * tempB * (tempB - 1); } if ((tempA > 10) || (tempB > 10)) { S12Abund++; } } } C12 = 1 - (C12Numerator /(float) t11); part1 = S12Rare / (float)C12; part2 = 1 / (float)C12; //calculate gammas Gamma1 = ((S12Rare * t21) / (float)((C12 * t10 * t11)) - 1); Gamma2 = ((S12Rare * t12) / (float)((C12 * t01 * t11)) - 1); Gamma3 = ((S12Rare / C12) * (S12Rare / C12)) * ( t22 / (float)(t10 * t01 * t11)); Gamma3 = Gamma3 - ((S12Rare * t11) / (float)(C12 * t01 * t10)) - Gamma1 - Gamma2; if (isnan(Gamma1) || isinf(Gamma1)) { Gamma1 = 0; } if (isnan(Gamma2) || isinf(Gamma2)) { Gamma2 = 0; } if (isnan(Gamma3) || isinf(Gamma3)) { Gamma3 = 0; } if (isnan(part1) || isinf(part1)) { part1 = 0; } if (isnan(part2) || isinf(part2)) { part2 = 0; } part3 = fARare * Gamma1; part4 = fBRare * Gamma2; part5 = f11 * Gamma3; Sharedace = S12Abund + part1 + (part2 * (part3 + part4 + part5)); data[0] = Sharedace; return data; } catch(exception& e) { m->errorOut(e, "SharedAce", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedace.h000077500000000000000000000014401424121717000210270ustar00rootroot00000000000000#ifndef SHAREDACE_H #define SHAREDACE_H /* * sharedace.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedAce estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SharedAce : public Calculator { public: SharedAce(int n=10) : abund(n), Calculator("sharedace", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/SharedAce"; } private: int abund; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedanderbergs.cpp000077500000000000000000000023221424121717000227460ustar00rootroot00000000000000/* * sharedanderberg.cpp * Mothur * * Created by Sarah Westcott on 3/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedanderbergs.h" /***********************************************************************/ EstOutput Anderberg::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = 1.0 - S12 / ((float)((2 * S1) + (2 * S2) - (3 * S12))); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Anderberg", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedanderbergs.h000077500000000000000000000012451424121717000224160ustar00rootroot00000000000000#ifndef ANDERBERG_H #define ANDERBERG_H /* * sharedanderberg.h * Mothur * * Created by Sarah Westcott on 3/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Anderberg : public Calculator { public: Anderberg() : Calculator("anderberg", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Anderberg"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedbraycurtis.cpp000077500000000000000000000026171424121717000230300ustar00rootroot00000000000000/* * sharedbraycurtis.cpp * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedbraycurtis.h" /***********************************************************************/ //This is used by SharedJAbund and SharedSorAbund EstOutput BrayCurtis::getValues(vector shared) { try { data.resize(1,0); double sumSharedA, sumSharedB, sumSharedAB, tempA, tempB; sumSharedA = shared[0]->getNumSeqs(); sumSharedB = shared[1]->getNumSeqs(); sumSharedAB = 0; /*Xi, Yi = abundance of the ith shared OTU in A and B sumSharedA = the number of otus in A sumSharedB = the sum of all shared otus in B sumSharedAB = the sum of the minimum otus int all shared otus in AB. */ for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); //sum the min of tempA and tempB if (tempA < tempB) { sumSharedAB += tempA; } else { sumSharedAB += tempB; } } data[0] = 1.0 - (2 * sumSharedAB) / (float)( sumSharedA + sumSharedB); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "BrayCurtis", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedbraycurtis.h000077500000000000000000000012501424121717000224650ustar00rootroot00000000000000#ifndef BRAYCURTIS_H #define BRAYCURTIS_H /* * sharedbraycurtis.h * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class BrayCurtis : public Calculator { public: BrayCurtis() : Calculator("braycurtis", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Braycurtis"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedchao1.cpp000077500000000000000000000163701424121717000216350ustar00rootroot00000000000000/* * sharedchao1.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedchao1.h" /***********************************************************************/ EstOutput SharedChao1::getValues(vector shared){ try { data.resize(1,0); vector temp; int numGroups = shared.size(); float Chao = 0.0; float leftvalue, rightvalue; // IntNode is defined in mothur.h // The tree used here is a binary tree used to represent the f1+++, f+1++, f++1+, f+++1, f11++, f1+1+... // combinations required to solve the chao estimator equation for any number of groups. Conceptually, think // of each node as having a 1 and a + value, or for f2 values a 2 and a + value, and 2 pointers to intnodes, and 2 coeffient values. // The coeffient value is how many times you chose branch 1 to get to that fvalue. // If you choose left you are selecting the 1 or 2 value and right means the + value. For instance, to find // the number of bins that have f1+1+ you would start at the root, go left, right, left, and select the rightvalue. // the coeffient is 2. Note: we only set the coeffient in f2 values. //create and initialize trees to 0. initialTree(numGroups); for (int i = 0; i < shared[0]->getNumBins(); i++) { //get bin values and calc shared bool sharedByAll = true; temp.clear(); for (int j = 0; j < numGroups; j++) { temp.push_back(shared[j]->get(i)); if (temp[j] == 0) { sharedByAll = false; } } //they are shared if (sharedByAll ) { //find f1 and f2values updateTree(temp); } } //calculate chao1, (numleaves-1) because numleaves contains the ++ values. bool bias = false; for(int i=0;ilvalue == 0 || f2leaves[i]->rvalue == 0) { bias = true;} } if(bias){ for (int i = 0; i < numLeaves; i++) { leftvalue = (float)(f1leaves[i]->lvalue * (f1leaves[i]->lvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * (f2leaves[i]->lvalue + 1)); if (i != (numLeaves-1)) { rightvalue = (float)(f1leaves[i]->rvalue * (f1leaves[i]->rvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * (f2leaves[i]->rvalue + 1)); }else{ //add in sobs rightvalue = (float)(f1leaves[i]->rvalue); } Chao += leftvalue + rightvalue; } } else{ for (int i = 0; i < numLeaves; i++) { leftvalue = (float)(f1leaves[i]->lvalue * f1leaves[i]->lvalue) / (float)((pow(2, (float)f2leaves[i]->lcoef)) * f2leaves[i]->lvalue); if (i != (numLeaves-1)) { rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue); }else{ //add in sobs rightvalue = (float)(f1leaves[i]->rvalue); } Chao += leftvalue + rightvalue; } } for (int i = 0; i < numNodes; i++) { delete f1leaves[i]; delete f2leaves[i]; } data[0] = Chao; return data; } catch(exception& e) { m->errorOut(e, "SharedChao1", "getValues"); exit(1); } } /***********************************************************************/ //builds trees structure with n leaf nodes initialized to 0. void SharedChao1::initialTree(int n) { try { // (2^n) / 2. Divide by 2 because each leaf node contains 2 values. One for + and one for 1 or 2. numLeaves = pow(2, (float)n) / 2; numNodes = 2*numLeaves - 1; int countleft = 0; int countright = 1; f1leaves.resize(numNodes); f2leaves.resize(numNodes); //initialize leaf values for (int i = 0; i < numLeaves; i++) { f1leaves[i] = new IntNode(0, 0, nullptr, nullptr); f2leaves[i] = new IntNode(0, 0, nullptr, nullptr); } //set pointers to children for (int j = numLeaves; j < numNodes; j++) { f1leaves[j] = new IntNode(); f1leaves[j]->left = f1leaves[countleft]; f1leaves[j]->right = f1leaves[countright]; f2leaves[j] = new IntNode(); f2leaves[j]->left = f2leaves[countleft]; f2leaves[j]->right =f2leaves[countright]; countleft = countleft + 2; countright = countright + 2; } //point to root f1root = f1leaves[numNodes-1]; //point to root f2root = f2leaves[numNodes-1]; //set coeffients setCoef(f2root, 0); } catch(exception& e) { if ((toString(e.what()) == "vector::_M_fill_insert") || (toString(e.what()) == "St9bad_alloc")) { m->mothurOut("You are using " + toString(n) + " groups which creates 2^" + toString(n+1) + " nodes. Try reducing the number of groups you selected. \n"); exit(1); } m->errorOut(e, "SharedChao1", "initialTree"); exit(1); } } /***********************************************************************/ //take vector containing the abundance info. for a bin and updates trees. void SharedChao1::updateTree(vector bin) { try { updateBranchf1(f1root, bin, 0); updateBranchf2(f2root, bin, 0); } catch(exception& e) { m->errorOut(e, "SharedChao1", "updateTree"); exit(1); } } /***********************************************************************/ void SharedChao1::updateBranchf1(IntNode* node, vector bin, int index) { try { //if you have more than one group if (index == (bin.size()-1)) { if (bin[index] == 1) { node->lvalue++; node->rvalue++; } else { node->rvalue++; } }else { if (bin[index] == 1) { //follow path as if you are 1 updateBranchf1(node->left, bin, index+1); } //follow path as if you are + updateBranchf1(node->right, bin, index+1); } } catch(exception& e) { m->errorOut(e, "SharedChao1", "updateBranchf1"); exit(1); } } /***********************************************************************/ void SharedChao1::updateBranchf2(IntNode* node, vector bin, int index) { try { //if you have more than one group if (index == (bin.size()-1)) { if (bin[index] == 2) { node->lvalue++; node->rvalue++; } else { node->rvalue++; } }else { if (bin[index] == 2) { //follow path as if you are 1 updateBranchf2(node->left, bin, index+1); } //follow path as if you are + updateBranchf2(node->right, bin, index+1); } } catch(exception& e) { m->errorOut(e, "SharedChao1", "updateBranchf2"); exit(1); } } /***********************************************************************/ void SharedChao1::setCoef(IntNode* node, int coef) { try { if (node->left != nullptr) { setCoef(node->left, coef+1); setCoef(node->right, coef); }else { node->lcoef = coef+1; node->rcoef = coef; } } catch(exception& e) { m->errorOut(e, "SharedChao1", "setCoef"); exit(1); } } /***********************************************************************/ //for debugging purposes void SharedChao1::printTree() { m->mothurOut("F1 leaves\n"); printBranch(f1root); m->mothurOut("F2 leaves\n"); printBranch(f2root); } /*****************************************************************/ void SharedChao1::printBranch(IntNode* node) { try { // you are not a leaf if (node->left != nullptr) { printBranch(node->left); printBranch(node->right); }else { //you are a leaf m->mothurOut(toString(node->lvalue)+"\n"); m->mothurOut(toString(node->rvalue)+"\n"); } } catch(exception& e) { m->errorOut(e, "SharedChao1", "printBranch"); exit(1); } } /*****************************************************************/ mothur-1.48.0/source/calculators/sharedchao1.h000077500000000000000000000026321424121717000212760ustar00rootroot00000000000000#ifndef SHAREDCHAO1_H #define SHAREDCHAO1_H /* * sharedchao1.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Sharedchao1 estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SharedChao1 : public Calculator { public: SharedChao1() : Calculator("sharedchao", 1, true) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sharedchao"; } private: IntNode* f1root; IntNode* f2root; vector f1leaves; vector f2leaves; int numLeaves; int numNodes; void initialTree(int); //builds trees structure with n leaf nodes initialized to 0. void setCoef(IntNode*, int); void updateTree(vector); //take vector containing the abundance info. for a bin and updates trees. void updateBranchf1(IntNode*, vector, int); //pointer, vector of abundance values, index into vector void updateBranchf2(IntNode*, vector, int); //pointer, vector of abundance values, index into vector //for debugging void printTree(); void printBranch(IntNode*); }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedjabund.cpp000077500000000000000000000015441424121717000221020ustar00rootroot00000000000000/* * sharedjabund.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedjabund.h" #include "uvest.h" /***********************************************************************/ EstOutput JAbund::getValues(vector shared) { try { EstOutput UVest; UVest.resize(2,0); data.resize(1,0); UVEst uv; UVest = uv.getUVest(shared); //UVest[0] is Uest UVest[1] is Vest data[0] = 1.0-(UVest[0] * UVest[1]) / ((float)(UVest[0] + UVest[1] - (UVest[0] * UVest[1]))); if(data[0] > 1){data[0] = 0; } if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "JAbund", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedjabund.h000077500000000000000000000013731424121717000215470ustar00rootroot00000000000000#ifndef JABUND_H #define JABUND_H /* * sharedjabund.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedJAbund estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class JAbund : public Calculator { public: JAbund() : Calculator("jabund", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Jabund"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedjackknife.cpp000077500000000000000000000133451424121717000225660ustar00rootroot00000000000000/* * sharedjackknife.cpp * Mothur * * Created by Thomas Ryabin on 3/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedjackknife.h" /*************************************************************************************** ***************************************************************************************/ double SharedJackknife::simpson(vector abunds, double numInd, int numBins){ double denom = numInd*(numInd-1); double sum = 0; for(int i = 0; i < numBins; i++) sum += (double)abunds[i]*((double)abunds[i]-1)/denom; return sum; } /*****************************************************************************************/ double* SharedJackknife::jackknife(){ int numBins = groups.at(0)->getNumBins()-1; vector cArray(numBins); for(int i = 0; i < numBins; i++) cArray[i] = 0; double numInd = 0; for(int i = 0; i < numGroups; i++) for(int j = 0; j < numBins; j++) { int curAbund = groups.at(i)->get(j+1); cArray[j] += curAbund; numInd += (double)curAbund; } double baseD = 1/simpson(cArray, numInd, numBins); vector pseudoVals(numBins); double jackknifeEstimate = 0; for(int i = 0; i < numGroups; i++) { for(int j = 0; j < numBins-1; j++) { int abundDiff = -groups.at(i)->get(j+1); if(i > 0) abundDiff += groups.at(i-1)->get(j+1); cArray[j] += abundDiff; numInd += abundDiff; } double curD = 1/simpson(cArray, numInd, numBins); pseudoVals[i] = (double)numGroups*(baseD - curD) + curD; jackknifeEstimate += pseudoVals[i]; } jackknifeEstimate /= (double)numGroups; double variance = 0; for(int i = 0; i < numGroups; i++) variance += pow(pseudoVals[i]-jackknifeEstimate, 2); variance /= (double)numGroups*((double)numGroups-1); double stErr = sqrt(variance); double confLimit = 0; if(numGroups <= 30) confLimit = getConfLimit(numGroups-1, 1); else confLimit = 1.645; confLimit *= stErr; double* rdata = new double[3]; rdata[0] = baseD; rdata[1] = jackknifeEstimate - confLimit; rdata[2] = jackknifeEstimate + confLimit; return rdata; } /***********************************************************************/ double SharedJackknife::getConfLimit(int row, int col) //Rows are the degrees of freedom { //Found on http://www.vgtu.lt/leidiniai/elektroniniai/Probability.pdf/Table%203.pdf //Confidence Level .90 .95 .975 .99 .995 .999 .9995 double values[30][7] = {{3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578}, {1.886, 2.920, 4.303, 6.965, 9.925, 22.328, 31.600}, {1.638, 2.353, 3.182, 4.541, 5.841, 10.214, 12.924}, {1.533, 2.132, 2.776, 3.747, 4.604, 7.173, 8.610}, {1.476, 2.015, 2.571, 3.365, 4.032, 5.894, 6.869}, {1.440, 1.943, 2.447, 3.143, 3.707, 5.208, 5.959}, {1.415, 1.895, 2.365, 2.998, 3.499, 4.785, 5.408}, {1.397, 1.860, 2.306, 2.896, 3.355, 4.501, 5.041}, {1.383, 1.833, 2.262, 2.821, 3.250, 4.297, 4.781}, {1.372, 1.812, 2.228, 2.764, 3.169, 4.144, 4.587}, {1.363, 1.796, 2.201, 2.718, 3.106, 4.025, 4.437}, {1.356, 1.782, 2.179, 2.681, 3.055, 3.930, 4.318}, {1.350, 1.771, 2.160, 2.650, 3.012, 3.852, 4.221}, {1.345, 1.761, 2.145, 2.624, 2.977, 3.787, 4.140}, {1.341, 1.753, 2.131, 2.602, 2.947, 3.733, 4.073}, {1.337, 1.746, 2.120, 2.583, 2.921, 3.686, 4.015}, {1.333, 1.740, 2.110, 2.567, 2.898, 3.646, 3.965}, {1.330, 1.734, 2.101, 2.552, 2.878, 3.610, 3.922}, {1.328, 1.729, 2.093, 2.539, 2.861, 3.579, 3.883}, {1.325, 1.725, 2.086, 2.528, 2.845, 3.552, 3.850}, {1.323, 1.721, 2.080, 2.518, 2.831, 3.527, 3.819}, {1.321, 1.717, 2.074, 2.508, 2.819, 3.505, 3.792}, {1.319, 1.714, 2.069, 2.500, 2.807, 3.485, 3.768}, {1.318, 1.711, 2.064, 2.492, 2.797, 3.467, 3.745}, {1.316, 1.708, 2.060, 2.485, 2.787, 3.450, 3.725}, {1.315, 1.706, 2.056, 2.479, 2.779, 3.435, 3.707}, {1.314, 1.703, 2.052, 2.473, 2.771, 3.421, 3.689}, {1.313, 1.701, 2.048, 2.467, 2.763, 3.408, 3.674}, {1.311, 1.699, 2.045, 2.462, 2.756, 3.396, 3.660}, {1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646}}; return values[row][col]; } /***********************************************************************/ /************************************************************************************************/ EstOutput SharedJackknife::getValues(vector vectorShared){ //Fix this for collect, mistake was that it was made with summary in mind. try { if(callCount == numGroups*(numGroups-1)/2) { currentCallDone = true; callCount = 0; } callCount++; if(currentCallDone) { groups.clear(); currentCallDone = false; } if(groups.size() != numGroups) { if(groups.size() == 0) groups.push_back(vectorShared[0]); groups.push_back(vectorShared[1]); } if(groups.size() == numGroups && callCount < numGroups) { data.resize(3,0); double* rdata = jackknife(); data[0] = rdata[0]; data[1] = rdata[1]; data[2] = rdata[2]; delete[] rdata; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[0])) { data[2] = 0; } return data; } data.resize(3,0); data[0] = 0; data[1] = 0; data[2] = 0; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "SharedJackknife", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedjackknife.h000077500000000000000000000020571424121717000222310ustar00rootroot00000000000000#ifndef SHAREDJACKKNIFE_H #define SHAREDJACKKNIFE_H /* * sharedjackknife.h * Mothur * * Created by Thomas Ryabin on 3/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the SharedJackknife estimator. It is a child of the calculator class.*/ /***********************************************************************/ class SharedJackknife : public Calculator { public: SharedJackknife() : numGroups(-1), callCount(0), count(0), currentCallDone(true), Calculator("sharedjackknife", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sharedjackknife"; } private: int numGroups, callCount, count; bool currentCallDone; vector groups; double simpson(vector, double, int); double* jackknife(); double getConfLimit(int row, int col); }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedjclass.cpp000077500000000000000000000023421424121717000221130ustar00rootroot00000000000000/* * sharedjclass.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedjclass.h" /***********************************************************************/ EstOutput Jclass::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); //find number of bins in shared1 and shared2 if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = 1.0 - S12 / (float)(S1 + S2 - S12); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Jclass", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedjclass.h000077500000000000000000000013701424121717000215600ustar00rootroot00000000000000#ifndef JCLASS_H #define JCLASS_H /* * sharedjclass.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedJclass estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Jclass : public Calculator { public: Jclass() : Calculator("jclass", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Jclass"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedjest.cpp000077500000000000000000000030501424121717000215760ustar00rootroot00000000000000/* * sharedjest.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedchao1.h" #include "chao1.h" #include "sharedjest.h" /***********************************************************************/ EstOutput Jest::getValues(vector shared) { try { EstOutput S1, S2, S12; S12.resize(1,0); S1.resize(3,0); S2.resize(3,0); /*S1, S2 = number of OTUs estimated in A and B using the Chao estimator S12 = estimated number of OTUs shared between A and B using the SharedChao estimator*/ data.resize(1,0); SharedChao1* sharedChao = new SharedChao1(); Chao1* chaoS1 = new Chao1(); Chao1* chaoS2 = new Chao1(); SAbundVector* chaoS1Sabund = new SAbundVector(); SAbundVector* chaoS2Sabund = new SAbundVector(); *chaoS1Sabund = shared[0]->getSAbundVector(); *chaoS2Sabund = shared[1]->getSAbundVector(); S12 = sharedChao->getValues(shared); S1 = chaoS1->getValues(chaoS1Sabund); S2 = chaoS2->getValues(chaoS2Sabund); data[0] = 1.0 - S12[0] / (float)(S1[0] + S2[0] - S12[0]); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (data[0] < 0) { data[0] = 0; } if (data[0] > 1) { data[0] = 1; } delete sharedChao; delete chaoS1; delete chaoS2; delete chaoS1Sabund; delete chaoS2Sabund; return data; } catch(exception& e) { m->errorOut(e, "Jest", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedjest.h000077500000000000000000000013511424121717000212450ustar00rootroot00000000000000#ifndef JEST_H #define JEST_H /* * sharedjest.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedJest estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Jest : public Calculator { public: Jest() : Calculator("jest", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Jest"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedjsd.cpp000077500000000000000000000026671424121717000214260ustar00rootroot00000000000000// // sharedjsd.cpp // Mothur // // Created by SarahsWork on 12/9/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "sharedjsd.h" /***********************************************************************/ //KLD <- function(x,y) sum(x *log(x/y)) //JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2)) EstOutput JSD::getValues(vector shared) { try { data.resize(1,0); double KLD1 = 0.0; double KLD2 = 0.0; double totalA = shared[0]->getNumSeqs(); double totalB = shared[1]->getNumSeqs(); for (int i = 0; i < shared[0]->getNumBins(); i++) { double tempA = shared[0]->get(i) / totalA; double tempB = shared[1]->get(i) / totalB; if (util.isEqual(tempA, 0)) { tempA = 0.000001; } if (util.isEqual(tempB, 0)) { tempB = 0.000001; } double denom = (tempA+tempB)/(double)2.0; if (!util.isEqual(tempA, 0)) { KLD1 += tempA * log(tempA/denom); } //KLD(x,m) if (!util.isEqual(tempB, 0)) { KLD2 += tempB * log(tempB/denom); } //KLD(y,m) } data[0] = ((0.5*KLD1) + (0.5*KLD2)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "JSD", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedjsd.h000077500000000000000000000012411424121717000210560ustar00rootroot00000000000000// // sharedjsd.h // Mothur // // Created by SarahsWork on 12/9/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_sharedjsd_h #define Mothur_sharedjsd_h #include "calculator.h" /***********************************************************************/ //Jensen-Shannon divergence (JSD) class JSD : public Calculator { public: JSD() : Calculator("jsd", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/JSD"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedkstest.cpp000077500000000000000000000034421424121717000221530ustar00rootroot00000000000000/* * kstest.cpp * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedkstest.h" /***********************************************************************/ EstOutput KSTest::getValues(vector shared){ try { data.resize(3,0); //Must return shared1 and shared2 to original order at conclusion of kstest vector sortedA = shared[0]->get(); sort(sortedA.begin()+1, sortedA.end()); vector sortedB = shared[1]->get(); sort(sortedB.begin()+1, sortedB.end()); int numNZ1 = 0; for(int i = 0; i < shared[0]->getNumBins(); i++) { if(shared[0]->get(i) != 0) { numNZ1++; } } //shared[0]->numNZ(); int numNZ2 = 0; for(int i = 0; i < shared[1]->getNumBins(); i++) { if(shared[1]->get(i) != 0) { numNZ2++; } } //shared[1]->numNZ(); double numInd1 = (double)shared[0]->getNumSeqs(); double numInd2 = (double)shared[1]->getNumSeqs(); double maxDiff = -1; double sum1 = 0; double sum2 = 0; for(int i = 1; i < shared[0]->getNumBins(); i++) { sum1 += sortedA[i]; sum2 += sortedB[i]; double diff = fabs((double)sum1/numInd1 - (double)sum2/numInd2); if(diff > maxDiff) maxDiff = diff; } double DStatistic = maxDiff*numNZ1*numNZ2; double a = pow((double)(numNZ1 + numNZ2)/(numNZ1*numNZ2),.5); //double pVal = exp(-2*pow(maxDiff/a,2)); double critVal = 1.36*a*numNZ1*numNZ2; data[0] = DStatistic; data[1] = critVal; data[2] = 0; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } return data; } catch(exception& e) { m->errorOut(e, "KSTest", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedkstest.h000077500000000000000000000013451424121717000216200ustar00rootroot00000000000000#ifndef KSTEST_H #define KSTEST_H /* * kstest.h * Mothur * * Created by Thomas Ryabin on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the KSTest estimator on 2 groups. It is a child of the calculator class.*/ /***********************************************************************/ class KSTest : public Calculator { public: KSTest() : Calculator("kstest", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Kstest"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedkulczynski.cpp000077500000000000000000000023071424121717000230430ustar00rootroot00000000000000/* * sharedkulczynski.cpp * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedkulczynski.h" /***********************************************************************/ EstOutput Kulczynski::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = 1.0 - S12 / (float)(S1 + S2 - (2 * S12)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Kulczynski", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedkulczynski.h000077500000000000000000000012471424121717000225120ustar00rootroot00000000000000#ifndef KULCZYNSKI_H #define KULCZYNSKI_H /* * sharedkulczynski.h * Mothur * * Created by John Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Kulczynski : public Calculator { public: Kulczynski() : Calculator("kulczynski", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Kulczynski"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedkulczynskicody.cpp000077500000000000000000000023421424121717000237210ustar00rootroot00000000000000/* * sharedkulczynskicody.cpp * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedkulczynskicody.h" /***********************************************************************/ EstOutput KulczynskiCody::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = 1.0 - 0.5 * ((S12 / (float)S1) + (S12 / (float)S2)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "KulczynskiCody", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedkulczynskicody.h000077500000000000000000000013051424121717000233640ustar00rootroot00000000000000#ifndef KULCZYNSKICODY_H #define KULCZYNSKICODY_H /* * sharedkulczynskicody.h * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class KulczynskiCody : public Calculator { public: KulczynskiCody() : Calculator("kulczynskicody", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Kulczynskicody"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedlennon.cpp000077500000000000000000000024501424121717000221250ustar00rootroot00000000000000/* * sharedlennon.cpp * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedlennon.h" /***********************************************************************/ EstOutput Lennon::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB, min; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; min = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } tempA = S1 - S12; tempB = S2 - S12; if (tempA < tempB) { min = tempA; } else { min = tempB; } data[0] = 1.0 - S12 / (float)(S12 + min); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Lennon", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedlennon.h000077500000000000000000000012151424121717000215700ustar00rootroot00000000000000#ifndef LENNON_H #define LENNON_H /* * sharedlennon.h * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Lennon : public Calculator { public: Lennon() : Calculator("lennon", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Lennon"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedmarczewski.cpp000077500000000000000000000017311424121717000230140ustar00rootroot00000000000000/* * sharedmarczewski.cpp * Mothur * * Created by Thomas Ryabin on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedmarczewski.h" EstOutput SharedMarczewski::getValues(vector shared){ try { //SharedRAbundVector* shared1 = vectorShared[0]; //SharedRAbundVector* shared2 = vectorShared[1]; data.resize(1,0); double a = 0; double b = 0; double c = 0; for(int i = 1; i < shared[0]->getNumBins(); i++) { int abund1 = shared[0]->get(i); int abund2 = shared[1]->get(i); if(abund1 > 0 && abund2 > 0) a++; else if(abund1 > 0 && abund2 == 0) b++; else if(abund1 == 0 && abund2 > 0) c++; } data[0] = (b+c)/(a+b+c); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "SharedMarczewski", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedmarczewski.h000077500000000000000000000013071424121717000224600ustar00rootroot00000000000000#ifndef SHAREDMARCZEWSKI_H #define SHAREDMARCZEWSKI_H /* * sharedmarczewski.h * Mothur * * Created by Thomas Ryabin on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class SharedMarczewski : public Calculator { public: SharedMarczewski() : Calculator("sharedmarczewski", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sharedmarczewski"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedmorisitahorn.cpp000077500000000000000000000026771424121717000233650ustar00rootroot00000000000000/* * sharedmorisitahorn.cpp * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedmorisitahorn.h" /***********************************************************************/ EstOutput MorHorn::getValues(vector shared) { try { data.resize(1,0); double Atotal, Btotal, tempA, tempB; Atotal = 0; Btotal = 0; double morhorn, sumSharedA, sumSharedB, a, b, d; morhorn = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0; //get the total values we need to calculate the theta denominator sums for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls Atotal += shared[0]->get(i); Btotal += shared[1]->get(i); } //calculate the denominator sums for (int j = 0; j < shared[0]->getNumBins(); j++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(j); tempB = shared[1]->get(j); float relA = tempA / Atotal; float relB = tempB / Btotal; a += relA * relA; b += relB * relB; d += relA * relB; } morhorn = 1- (2 * d) / (a + b); if (isnan(morhorn) || isinf(morhorn)) { morhorn = 1; } data[0] = morhorn; return data; } catch(exception& e) { m->errorOut(e, "MorHorn", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedmorisitahorn.h000077500000000000000000000012431424121717000230160ustar00rootroot00000000000000#ifndef MORHORN_H #define MORHORN_H /* * sharedmorisitahorn.h * Mothur * * Created by Sarah Westcott on 3/24/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class MorHorn : public Calculator { public: MorHorn() : Calculator("morisitahorn", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Morisitahorn"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharednseqs.h000077500000000000000000000014351424121717000214340ustar00rootroot00000000000000#ifndef SHAREDNSEQS_H #define SHAREDNSEQS_H /* * sharednseqs.h * Mothur * * Created by Sarah Westcott on 3/16/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class SharedNSeqs : public Calculator { public: SharedNSeqs() : Calculator("sharednseqs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ return data; }; EstOutput getValues(vector shared) { data.resize(1,0); data[0] = (double)shared[0]->getNumSeqs() + (double)shared[1]->getNumSeqs(); return data; } string getCitation() { return "http://www.mothur.org/wiki/Sharednseqs"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedochiai.cpp000077500000000000000000000022611424121717000220700ustar00rootroot00000000000000/* * sharedochiai.cpp * Mothur * * Created by Sarah Westcott on 3/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedochiai.h" /***********************************************************************/ EstOutput Ochiai::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = S12 / ((float)pow((S1 * S2), 0.5)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Ochiai", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedochiai.h000077500000000000000000000012141424121717000215320ustar00rootroot00000000000000#ifndef OCHIAI_H #define OCHIAI_H /* * sharedochiai.h * Mothur * * Created by Sarah Westcott on 3/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Ochiai : public Calculator { public: Ochiai() : Calculator("ochiai", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/ochiai"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedrjsd.cpp000077500000000000000000000030211424121717000215710ustar00rootroot00000000000000// // sharedrjsd.cpp // Mothur // // Created by Sarah Westcott on 1/21/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "sharedrjsd.h" /***********************************************************************/ //KLD <- function(x,y) sum(x *log(x/y)) //JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2)) EstOutput RJSD::getValues(vector shared) { try { data.resize(1,0); double KLD1 = 0.0; double KLD2 = 0.0; double totalA = shared[0]->getNumSeqs(); double totalB = shared[1]->getNumSeqs(); for (int i = 0; i < shared[0]->getNumBins(); i++) { double tempA = shared[0]->get(i) / totalA; double tempB = shared[1]->get(i) / totalB; //tempA = shared[0]->get(i) / totalA; //tempB = shared[1]->get(i) / totalB; if (util.isEqual(tempA, 0)) { tempA = 0.000001; } if (util.isEqual(tempB, 0)) { tempB = 0.000001; } double denom = (tempA+tempB)/(double)2.0; if (!util.isEqual(tempA, 0)) { KLD1 += tempA * log(tempA/denom); } //KLD(x,m) if (!util.isEqual(tempB, 0)) { KLD2 += tempB * log(tempB/denom); } //KLD(y,m) } data[0] = sqrt((0.5*KLD1) + (0.5*KLD2)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "RJSD", "getValues"); exit(1); } } mothur-1.48.0/source/calculators/sharedrjsd.h000077500000000000000000000012551424121717000212450ustar00rootroot00000000000000// // sharedrjsd.h // Mothur // // Created by Sarah Westcott on 1/21/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #ifndef Mothur_sharedrjsd_h #define Mothur_sharedrjsd_h #include "calculator.h" /***********************************************************************/ //Jensen-Shannon divergence (JSD) class RJSD : public Calculator { public: RJSD() : Calculator("rjsd", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/RJSD"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedsobs.cpp000077500000000000000000000017071424121717000216060ustar00rootroot00000000000000/* * sharedsobs.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedsobs.h" /***********************************************************************/ //This returns the number of unique species observed in several groups. //The shared vector is each groups sharedrabundvector. EstOutput SharedSobs::getValues(vector shared){ try { data.resize(1,0); double observed = 0; //loop through the species in each group for (int k = 0; k < shared[0]->getNumBins(); k++) { //if you have found a new species if (shared[0]->get(k) != 0) { observed++; } else if ((shared[0]->get(k) == 0) && (shared[1]->get(k) != 0)) { observed++; } } data[0] = observed; return data; } catch(exception& e) { m->errorOut(e, "SharedSobs", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedsobs.h000077500000000000000000000014501424121717000212460ustar00rootroot00000000000000#ifndef SHAREDSOBS_H #define SHAREDSOBS_H /* * sharedsobs.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedSobs estimator on two groups for the shared rarefaction command. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SharedSobs : public Calculator { public: SharedSobs() : Calculator("sharedsobs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ return data; }; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/sharedsobs"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedsobscollectsummary.cpp000077500000000000000000000040731424121717000245710ustar00rootroot00000000000000/* * sharedsobscollectsummary.cpp * Mothur * * Created by Sarah Westcott on 2/12/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedsobscollectsummary.h" /***********************************************************************/ //This returns the number of shared species observed in several groups. //The shared vector is each groups sharedrabundvector. EstOutput SharedSobsCS::getValues(vector shared){ try { data.resize(1,0); double observed = 0; int numGroups = shared.size(); for (int i = 0; i < shared[0]->getNumBins(); i++) { //get bin values and set sharedByAll bool sharedByAll = true; for (int j = 0; j < numGroups; j++) { if (shared[j]->get(i) == 0) { sharedByAll = false; } } //they are shared if (sharedByAll ) { observed++; } } data[0] = observed; return data; } catch(exception& e) { m->errorOut(e, "SharedSobsCS", "getValues"); exit(1); } } /***********************************************************************/ //This returns the number of shared species observed in several groups. //The shared vector is each groups sharedrabundvector. EstOutput SharedSobsCS::getValues(vector shared, vector& labels, vector otuNames){ try { data.resize(1,0); double observed = 0; int numGroups = shared.size(); labels.clear(); for (int i = 0; i < shared[0]->getNumBins(); i++) { //get bin values and set sharedByAll bool sharedByAll = true; for (int j = 0; j < numGroups; j++) { if (shared[j]->get(i) == 0) { sharedByAll = false; } } //they are shared if (sharedByAll == true) { observed++; labels.push_back(otuNames[i]); } } data[0] = observed; return data; } catch(exception& e) { m->errorOut(e, "SharedSobsCS", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedsobscollectsummary.h000077500000000000000000000015531424121717000242360ustar00rootroot00000000000000#ifndef SHAREDSOBSCOLLECTSUMMARY_H #define SHAREDSOBSCOLLECTSUMMARY_H /* * sharedsobscollectsummary.h * Mothur * * Created by Sarah Westcott on 2/12/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This calculator returns the number of shared species between 2 groups. */ #include "calculator.h" /***********************************************************************/ class SharedSobsCS : public Calculator { public: SharedSobsCS() : Calculator("sharedsobs", 1, true) {}; EstOutput getValues(SAbundVector* rank){ return data; }; EstOutput getValues(vector); EstOutput getValues(vector, vector&, vector); string getCitation() { return "http://www.mothur.org/wiki/Sharedsobs"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedsorabund.cpp000077500000000000000000000015171424121717000224540ustar00rootroot00000000000000/* * sharedsorabund.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedsorabund.h" #include "uvest.h" /***********************************************************************/ EstOutput SorAbund::getValues(vector shared) { try { EstOutput UVest; UVest.resize(2,0); data.resize(1,0); UVEst uv; UVest = uv.getUVest(shared); //UVest[0] is Uest, UVest[1] is Vest data[0] = (2 * UVest[0] * UVest[1]) / ((float)(UVest[0] + UVest[1])); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } data[0] = 1-data[0]; return data; } catch(exception& e) { m->errorOut(e, "SorAbund", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedsorabund.h000077500000000000000000000014121424121717000221130ustar00rootroot00000000000000#ifndef SORABUND_H #define SORABUND_H /* * sharedsorabund.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedSorAbund estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SorAbund : public Calculator { public: SorAbund() : Calculator("sorabund", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sorabund"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedsorclass.cpp000077500000000000000000000023451424121717000224700ustar00rootroot00000000000000/* * sharedsorclass.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedsorclass.h" /***********************************************************************/ EstOutput SorClass::getValues(vector shared) { try { double S1, S2, S12, tempA, tempB; S1 = 0; S2 = 0; S12 = 0; tempA = 0; tempB = 0; /*S1, S2 = number of OTUs observed or estimated in A and B S12=number of OTUs shared between A and B */ data.resize(1,0); for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); //find number of bins in shared1 and shared2 if (!util.isEqual(tempA, 0)) { S1++; } if (!util.isEqual(tempB, 0)) { S2++; } //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { S12++; } } data[0] = 1.0-(2 * S12) / (float)(S1 + S2); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "SorClass", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedsorclass.h000077500000000000000000000014151424121717000221320ustar00rootroot00000000000000#ifndef SORCLASS_H #define SORCLASS_H /* * sharedsorclass.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedSorClass estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SorClass : public Calculator { public: SorClass() : Calculator("sorclass", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sorclass"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedsorest.cpp000077500000000000000000000027021424121717000221530ustar00rootroot00000000000000/* * sharedsorest.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedsorest.h" #include "chao1.h" #include "sharedchao1.h" /***********************************************************************/ EstOutput SorEst::getValues(vector shared) { try { EstOutput S1, S2, S12; S12.resize(1,0); S1.resize(3,0); S2.resize(3,0); /*S1, S2 = number of OTUs estimated in A and B using the Chao estimator S12 = estimated number of OTUs shared between A and B using the SharedChao estimator*/ data.resize(1,0); SharedChao1* sharedChao = new SharedChao1(); Chao1* chaoS1 = new Chao1(); Chao1* chaoS2 = new Chao1(); SAbundVector* chaoS1Sabund = new SAbundVector(); SAbundVector* chaoS2Sabund = new SAbundVector(); *chaoS1Sabund = shared[0]->getSAbundVector(); *chaoS2Sabund = shared[1]->getSAbundVector(); S12 = sharedChao->getValues(shared); S1 = chaoS1->getValues(chaoS1Sabund); S2 = chaoS2->getValues(chaoS2Sabund); data[0] = 1.0-(2 * S12[0]) / (float)(S1[0] + S2[0]); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } delete sharedChao; delete chaoS1; delete chaoS2; delete chaoS1Sabund; delete chaoS2Sabund; return data; } catch(exception& e) { m->errorOut(e, "SorEst", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedsorest.h000077500000000000000000000013711424121717000216210ustar00rootroot00000000000000#ifndef SOREST_H #define SOREST_H /* * sharedsorest.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedSorEst estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class SorEst : public Calculator { public: SorEst() : Calculator("sorest", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Sorest"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedthetan.cpp000077500000000000000000000032261424121717000221210ustar00rootroot00000000000000/* * sharedthetan.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedthetan.h" /***********************************************************************/ EstOutput ThetaN::getValues(vector shared) { try { data.resize(1,0); double Atotal, Btotal, tempA, tempB; Atotal = 0; Btotal = 0; double numerator, denominator, thetaN, sumSharedA, sumSharedB, a, b, d; numerator = 0.0; denominator = 0.0; thetaN = 0.0; sumSharedA = 0.0; sumSharedB = 0.0; a = 0.0; b = 0.0; d = 0.0; //get the total values we need to calculate the theta denominator sums for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls Atotal += shared[0]->get(i); Btotal += shared[1]->get(i); } //calculate the theta denominator sums for (int j = 0; j < shared[0]->getNumBins(); j++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(j); tempB = shared[1]->get(j); //they are shared if (!util.isEqual(tempA, 0) && !util.isEqual(tempB, 0)) { if (!util.isEqual(Atotal, 0)) { sumSharedA = (tempA / (float)Atotal); } if (!util.isEqual(Btotal, 0)) { sumSharedB = (tempB / (float)Btotal); } a += sumSharedA; b += sumSharedB; } } thetaN = (a * b) / (a + b - (a * b)); if (isnan(thetaN) || isinf(thetaN)) { thetaN = 0; } data[0] = 1.0 - thetaN; return data; } catch(exception& e) { m->errorOut(e, "ThetaN", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedthetan.h000077500000000000000000000013711424121717000215650ustar00rootroot00000000000000#ifndef THETAN_H #define THETAN_H /* * sharedthetan.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedThetaN estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class ThetaN : public Calculator { public: ThetaN() : Calculator("thetan", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Thetan"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sharedthetayc.cpp000077500000000000000000000043061424121717000222770ustar00rootroot00000000000000/* * sharedthetayc.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedthetayc.h" /***********************************************************************/ EstOutput ThetaYC::getValues(vector shared) { try { data.resize(3,0.0000); double Atotal = (double)shared[0]->getNumSeqs(); double Btotal = (double)shared[1]->getNumSeqs(); double thetaYC = 0; double pi = 0; double qi = 0; double a = 0; double b = 0; double d = 0; double sumPcubed = 0; double sumQcubed = 0; double sumPQsq = 0; double sumPsqQ = 0; //calculate the theta denominator sums for (int j = 0; j < shared[0]->getNumBins(); j++) { //store in temps to avoid multiple repetitive function calls pi = shared[0]->get(j) / Atotal; qi = shared[1]->get(j) / Btotal; a += pi * pi; b += qi * qi; d += pi * qi; sumPcubed += pi * pi * pi; sumQcubed += qi * qi * qi; sumPQsq += pi * qi * qi; sumPsqQ += pi * pi * qi; } thetaYC = d / (a + b - d); if (isnan(thetaYC) || isinf(thetaYC)) { thetaYC = 0; } double varA = 4 / Atotal * (sumPcubed - a * a); double varB = 4 / Btotal * (sumQcubed - b * b); double varD = sumPQsq / Atotal + sumPsqQ / Btotal - d * d * (1/Atotal + 1/Btotal); double covAD = 2 / Atotal * (sumPsqQ - a * d); double covBD = 2 / Btotal * (sumPQsq - b* d); double varT = d * d * (varA + varB) / pow(a + b - d, (double)4.0) + pow(a+b, (double)2.0) * varD / pow(a+b-d, (double)4.0) - 2.0 * (a + b) * d / pow(a + b - d, (double)4.0) * (covAD + covBD); double ci = 1.95 * sqrt(varT); data[0] = thetaYC; data[1] = thetaYC - ci; data[2] = thetaYC + ci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } data[0] = 1.0 - data[0]; double hold = data[1]; data[1] = 1.0 - data[2]; data[2] = 1.0 - hold; return data; } catch(exception& e) { m->errorOut(e, "ThetaYC", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sharedthetayc.h000077500000000000000000000014021424121717000217360ustar00rootroot00000000000000#ifndef THETAYC_H #define THETAYC_H /* * sharedthetayc.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the SharedThetaYC estimator on two groups. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class ThetaYC : public Calculator { public: ThetaYC() : Calculator("thetayc", 3, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Thetayc"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/shen.cpp000077500000000000000000000014061424121717000204020ustar00rootroot00000000000000/* * shen.cpp * Mothur * * Created by Thomas Ryabin on 5/18/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "shen.h" #include "ace.h" /***********************************************************************/ EstOutput Shen::getValues(SAbundVector* rank){ try { data.resize(1,0); double n = (double)rank->getNumSeqs(); double f1 = (double)rank->get(1); Ace* calc = new Ace(abund); EstOutput ace = calc->getValues(rank); double f0 = ace[0]-rank->getNumBins(); data[0] = f0 * (1 - pow(1 - f1/n/f0, f)); delete calc; return data; } catch(exception& e) { m->errorOut(e, "Shen", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/shen.h000077500000000000000000000014271424121717000200520ustar00rootroot00000000000000#ifndef SHEN_H #define SHEN_H /* * shen.h * Mothur * * Created by Thomas Ryabin on 5/18/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the shen calculator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Shen : public Calculator { public: Shen(int size, int n) : f(size), abund(n), Calculator("shen", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Shen"; } private: int f; int abund; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/siabundance.cpp000066400000000000000000000034131424121717000217160ustar00rootroot00000000000000// // siabundance.cpp // Mothur // // Created by Sarah Westcott on 5/22/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "siabundance.hpp" /***********************************************************************/ SIAbundance::SIAbundance() : DiversityCalculator(true) {} /***********************************************************************/ vector SIAbundance::getValues(int nMax, vector& sampling) { //int nMax = rank->getMaxRank(); try { nMax = floor(pow(2.0,ceil(log((double) nMax)/log(2.0)) + 2.0) + 1.0e-7); results.resize(nMax, 0.0); int nSamples = sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL DiversityUtils dutils("siabund"); gsl_set_error_handler_off(); for(int i = 0; i < sampling.size(); i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= nMax; j++) { int nA = j; double dLog = 0.0, dP = 0.0; dLog = dutils.logLikelihood(nA, sampling[i].alpha, sampling[i].beta, sampling[i].dNu); dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=nMax; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "SIAbundance", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/siabundance.hpp000066400000000000000000000012211424121717000217160ustar00rootroot00000000000000// // siabundance.hpp // Mothur // // Created by Sarah Westcott on 5/22/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef siabundance_hpp #define siabundance_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class SIAbundance : public DiversityCalculator { public: SIAbundance(); vector getValues(int mr, vector& sampling); string getTag() { return "si"; } private: }; /***********************************************************************/ #endif /* siabundance_hpp */ mothur-1.48.0/source/calculators/simpson.cpp000077500000000000000000000030761424121717000211420ustar00rootroot00000000000000/* * simpson.cpp * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "simpson.h" /***********************************************************************/ EstOutput Simpson::getValues(SAbundVector* rank){ try { //vector simpsonData(3,0); data.resize(3,0); double simpson = 0.0000; double ci = 0; double maxRank = (double)rank->getMaxRank(); double sampled = (double)rank->getNumSeqs(); double sobs = (double)rank->getNumBins(); double firstTerm = 0; double secondTerm = 0; if(!util.isEqual(sobs, 0)){ double simnum=0.0000; for(unsigned long long i=1;i<=maxRank;i++){ simnum += (double)(rank->get(i)*i*(i-1)); } simpson = simnum / (sampled*(sampled-1)); for(unsigned long long i=1;i<=maxRank;i++){ double piI = (double) i / (double)sampled; firstTerm += rank->get(i) * pow(piI, 3); secondTerm += rank->get(i) * pow(piI, 2); } double var = (4.0 / sampled) * (firstTerm - secondTerm*secondTerm); ci = 1.95 * pow(var, 0.5); } double simpsonlci = simpson - ci; double simpsonhci = simpson + ci; data[0] = simpson; data[1] = simpsonlci; data[2] = simpsonhci; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Simpson", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/simpson.h000077500000000000000000000013521424121717000206020ustar00rootroot00000000000000#ifndef SIMPSON_H #define SIMPSON_H /* * simpson.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Simpson estimator on single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Simpson : public Calculator { public: Simpson() : Calculator("simpson", 3, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Simpson"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/simpsoneven.cpp000077500000000000000000000012521424121717000220120ustar00rootroot00000000000000/* * simpsoneven.cpp * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "simpsoneven.h" #include "invsimpson.h" /***********************************************************************/ EstOutput SimpsonEven::getValues(SAbundVector* rank){ try { data.resize(1,0); InvSimpson* simp = new InvSimpson(); vector invSimpData = simp->getValues(rank); data[0] = invSimpData[0] / double(rank->getNumBins()); return data; } catch(exception& e) { m->errorOut(e, "SimpsonEven", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/simpsoneven.h000077500000000000000000000012151424121717000214560ustar00rootroot00000000000000#ifndef SIMPSONEVEN_H #define SIMPSONEVEN_H /* * simpsoneven.h * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class SimpsonEven : public Calculator { public: SimpsonEven() : Calculator("simpsoneven", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Simpsoneven"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sirarefaction.cpp000066400000000000000000000055231424121717000222770ustar00rootroot00000000000000// // sirarefaction.cpp // Mothur // // Created by Sarah Westcott on 5/23/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "sirarefaction.hpp" /***********************************************************************/ SIRarefaction::SIRarefaction(double c) : coverage(c), DiversityCalculator(true) {} /***********************************************************************/ int compare_doubles3(const void* a, const void* b) { double* arg1 = (double *) a; double* arg2 = (double *) b; if( *arg1 < *arg2 ) return -1; else if( *arg1 == *arg2 ) return 0; else return 1; } /***********************************************************************/ vector SIRarefaction::getValues(int numSeqs, vector& sampling){ //int sampled = rank->getNumSeqs(); //nj try { #ifdef USE_GSL DiversityUtils dutils("sirarefaction"); int nSamples = sampling.size(); double* adMu = nullptr; double dLower = 0.0, dMedian = 0.0, dUpper = 0.0; gsl_set_error_handler_off(); t_LSParams* atLSParams; atLSParams = (t_LSParams *) malloc(nSamples*sizeof(t_LSParams)); //MAX_SAMPLES //load sampling data for (int i = 0; i < nSamples; i++) { if (m->getControl_pressed()) { free(atLSParams); return results; } atLSParams[i].dMDash = sampling[i].alpha; atLSParams[i].dV = sampling[i].beta; atLSParams[i].dNu = sampling[i].dNu; atLSParams[i].dC = coverage; atLSParams[i].n = 0; } adMu = (double *) malloc(sizeof(double)*nSamples); for(int i = 0; i < nSamples; i++){ adMu[i] = ((double) numSeqs)*dutils.calcMu(&atLSParams[i]); } qsort(adMu, nSamples, sizeof(double), compare_doubles3); dLower = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.025); dMedian = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.5); dUpper = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.975); if (isnan(dLower) || isinf(dLower)) { dLower = 0; } if (isnan(dMedian) || isinf(dMedian)) { dMedian = 0; } if (isnan(dUpper) || isinf(dUpper)) { dUpper = 0; } m->mothurOut("\nSIRarefaction - d_Lower = " + toString(dLower) + " d_Median = " + toString(dMedian) + " d_Upper = " + toString(dUpper) + "\n\n"); results.push_back(dLower); results.push_back(dMedian); results.push_back(dUpper); free(adMu); free(atLSParams); #endif return results; } catch(exception& e) { m->errorOut(e, "SIRarefaction", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sirarefaction.hpp000066400000000000000000000013561424121717000223040ustar00rootroot00000000000000// // sirarefaction.hpp // Mothur // // Created by Sarah Westcott on 5/23/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef sirarefaction_hpp #define sirarefaction_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class SIRarefaction : public DiversityCalculator { public: SIRarefaction(double c); // : coverage(c) { m = MothurOut::getInstance(); } vector getValues(int ns, vector& sampling); string getTag() { return "si"; } private: double coverage; }; /***********************************************************************/ #endif /* sirarefaction_hpp */ mothur-1.48.0/source/calculators/sishift.cpp000066400000000000000000000035121424121717000211130ustar00rootroot00000000000000// // sishift.cpp // Mothur // // Created by Sarah Westcott on 5/23/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "sishift.hpp" /***********************************************************************/ SIShift::SIShift() : DiversityCalculator(true) {} /***********************************************************************/ vector SIShift::getValues(int numSeqs, vector& sampling) { try { int nMax = 1000; //nMax nMax = floor(pow(2.0,ceil(log((double) nMax)/log(2.0)) + 2.0) + 1.0e-7); results.resize(nMax, 0.0); int nSamples = (int)sampling.size(); if (nSamples == 0) { return results; } #ifdef USE_GSL DiversityUtils dutils("sishift"); gsl_set_error_handler_off(); double dShift = 5.0e5/(double) numSeqs; for(int i = 0; i < nSamples; i++) { if (m->getControl_pressed()) { break; } for (int j = 1; j <= nMax; j++) { int nA = j; double dLog = 0.0, dP = 0.0; dLog = dutils.logLikelihood(nA, sampling[i].alpha*sqrt(dShift), sampling[i].beta*dShift, sampling[i].dNu); dP = exp(dLog); results[j - 1] += dP*sampling[i].ns; } } for (int i = 1; i<=nMax; i++) { results[i-1] /= (double)nSamples; if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; } } #endif return results; } catch(exception& e) { m->errorOut(e, "SIShift", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/sishift.hpp000066400000000000000000000011731424121717000211210ustar00rootroot00000000000000// // sishift.hpp // Mothur // // Created by Sarah Westcott on 5/23/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef sishift_hpp #define sishift_hpp #include "diversityutils.hpp" #include "diversitycalc.h" /***********************************************************************/ class SIShift : public DiversityCalculator { public: SIShift(); vector getValues(int ns, vector& sampling); string getTag() { return "si"; } private: }; /***********************************************************************/ #endif /* sishift_hpp */ mothur-1.48.0/source/calculators/smithwilson.cpp000077500000000000000000000017071424121717000220310ustar00rootroot00000000000000/* * smithwilson.cpp * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "smithwilson.h" /***********************************************************************/ EstOutput SmithWilson::getValues(SAbundVector* rank){ try { data.resize(1,0); double maxRank = rank->getMaxRank(); double sobs = rank->getNumBins(); double innerSum = 0; for(int i=1;i<=maxRank;i++){ innerSum += rank->get(i) * log(i); } innerSum /= sobs; double outerSum = 0; for(int i=1;i<=maxRank;i++){ outerSum += rank->get(i) * (log(i) - innerSum) * (log(i) - innerSum); } outerSum /= sobs; if(outerSum > 0){ data[0] = 1.0000 - 2.0000 / (3.14159 * atan(outerSum)); } else{ data[0] = 1.0000; } return data; } catch(exception& e) { m->errorOut(e, "InvSimpson", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/smithwilson.h000077500000000000000000000012101424121717000214630ustar00rootroot00000000000000#ifndef SMITHWILSON #define SMITHWILSON /* * smithwilson.h * Mothur * * Created by Pat Schloss on 8/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class SmithWilson : public Calculator { public: SmithWilson() : Calculator("smithwilson", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Smithwilson"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/sobs.h000077500000000000000000000014451424121717000200630ustar00rootroot00000000000000#ifndef SOBS_H #define SOBS_H /* * sobs.h * Dotur * * Created by Sarah Westcott on 1/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the Sobs estimator on single group. It is a child of the calculator class. */ #include "calculator.h" /***********************************************************************/ class Sobs : public Calculator { public: Sobs() : Calculator("sobs", 1, false) {}; EstOutput getValues(SAbundVector* rank){ data.resize(1,0); data[0] = (double)rank->getNumBins(); return data; } EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Sobs"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/soergel.cpp000077500000000000000000000015341424121717000211070ustar00rootroot00000000000000/* * soergel.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "soergel.h" /***********************************************************************/ EstOutput Soergel::getValues(vector shared) { try { data.resize(1,0); double sumNum = 0.0; double sumMax = 0.0; //calc the 2 denominators for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); sumNum += abs((Aij - Bij)); sumMax += max(Aij, Bij); } data[0] = sumNum / sumMax; if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Soergel", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/soergel.h000077500000000000000000000011761424121717000205560ustar00rootroot00000000000000#ifndef SOERGEL_H #define SOERGEL_H /* * soergel.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Soergel : public Calculator { public: Soergel() : Calculator("soergel", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Soergel"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/solow.cpp000077500000000000000000000012541424121717000206110ustar00rootroot00000000000000/* * solow.cpp * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "solow.h" #include /***********************************************************************/ EstOutput Solow::getValues(SAbundVector* rank){ try { data.resize(1,0); double n = (double)rank->getNumSeqs(); double f1 = (double)rank->get(1); double f2 = (double)rank->get(2); data[0] = f1*f1/2/f2 * (1 - pow(1 - 2*f2/n/f1, f)); return data; } catch(exception& e) { m->errorOut(e, "Solow", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/solow.h000077500000000000000000000014021424121717000202510ustar00rootroot00000000000000#ifndef SOLOW_H #define SOLOW_H /* * solow.h * Mothur * * Created by Thomas Ryabin on 5/13/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /* This class implements the solow calculator on single group. It is a child of the calculator class. */ /***********************************************************************/ class Solow : public Calculator { public: Solow(int size) : f(size), Calculator("solow", 1, false) {}; EstOutput getValues(SAbundVector*); EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/Solow"; } private: int f; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/spearman.cpp000077500000000000000000000036441424121717000212610ustar00rootroot00000000000000/* * spearman.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "spearman.h" /***********************************************************************/ EstOutput Spearman::getValues(vector shared) { try { data.resize(1,0); SAbundVector savA = shared[0]->getSAbundVector(); SAbundVector savB = shared[1]->getSAbundVector(); double sumRanks = 0.0; int numOTUS = shared[0]->getNumBins(); vector rankVectorA(savA.getMaxRank()+1, 0); int currentRankA = 0; for(int i=savA.getMaxRank();i>0;i--){ int numWithAbundanceI = savA.get(i); if(numWithAbundanceI > 1){ rankVectorA[i] = (currentRankA + 1 + currentRankA + numWithAbundanceI) / 2.0; } else { rankVectorA[i] = currentRankA+numWithAbundanceI; } currentRankA += numWithAbundanceI; } rankVectorA[0] = (numOTUS + currentRankA + 1) / 2.0; vector rankVectorB(savB.getMaxRank()+1, 0); int currentRankB = 0; for(int i=savB.getMaxRank();i>0;i--){ int numWithAbundanceI = savB.get(i); if(numWithAbundanceI > 1){ rankVectorB[i] = (currentRankB + 1 + currentRankB + numWithAbundanceI) / 2.0; } else { rankVectorB[i] = currentRankB+numWithAbundanceI; } currentRankB += numWithAbundanceI; } rankVectorB[0] = (numOTUS + currentRankB + 1) / 2.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); float rankA = rankVectorA[Aij]; float rankB = rankVectorB[Bij]; sumRanks += ((rankA - rankB) * (rankA - rankB)); } data[0] = 1.0 - ((6 * sumRanks) / (float) (numOTUS * ((numOTUS*numOTUS)-1))); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "Spearman", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/spearman.h000077500000000000000000000012101424121717000207110ustar00rootroot00000000000000#ifndef SPEARMAN_H #define SPEARMAN_H /* * spearman.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class Spearman : public Calculator { public: Spearman() : Calculator("spearman", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Spearman"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/speciesprofile.cpp000077500000000000000000000017401424121717000224620ustar00rootroot00000000000000/* * speciesprofile.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "speciesprofile.h" /***********************************************************************/ EstOutput SpeciesProfile::getValues(vector shared) { try { data.resize(1,0); double sumA = 0.0; double sumB = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { sumA += shared[0]->get(i); sumB += shared[1]->get(i); } double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); sum += (((A / sumA) - (B / sumB)) * ((A / sumA) - (B / sumB))); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "SpeciesProfile", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/speciesprofile.h000077500000000000000000000012561424121717000221310ustar00rootroot00000000000000#ifndef SPECIESPROFILE_H #define SPECIESPROFILE_H /* * speciesprofile.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class SpeciesProfile : public Calculator { public: SpeciesProfile() : Calculator("speciesprofile", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Speciesprofile"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/specificity.cpp000077500000000000000000000013611424121717000217600ustar00rootroot00000000000000// // specificity.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "specificity.hpp" /***********************************************************************/ double Specificity::getValue(double tp, double tn, double fp, double fn) { try { long long n = fp + tn; double specificity = tn / (double) n; if(n == 0) { specificity = 0; } if (isnan(specificity) || isinf(specificity)) { specificity = 0; } return specificity; } catch(exception& e) { m->errorOut(e, "Specificity", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/specificity.hpp000077500000000000000000000012531424121717000217650ustar00rootroot00000000000000// // specificity.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef specificity_hpp #define specificity_hpp #include "calculator.h" /***********************************************************************/ class Specificity : public ClusterMetric { public: Specificity() : ClusterMetric("spec") {}; double getValue(double tp, double tn, double fp, double fn); //ignores tp, fn string getCitation() { return "http://www.mothur.org/wiki/Specificity"; } private: }; /***********************************************************************/ #endif /* specificity_hpp */ mothur-1.48.0/source/calculators/structchi2.cpp000077500000000000000000000025271424121717000215440ustar00rootroot00000000000000/* * structchi2.cpp * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "structchi2.h" /***********************************************************************/ EstOutput StructChi2::getValues(vector shared) { try { data.resize(1,0); double sumA = shared[0]->getNumSeqs(); double sumB = shared[1]->getNumSeqs(); double totalSum = 0.0; for (int i = 0; i < shared.size(); i++) { totalSum += shared[i]->getNumSeqs(); } vector sumOtus; sumOtus.resize(shared[0]->getNumBins(), 0); //for each otu for (int i = 0; i < shared[0]->getNumBins(); i++) { //for each group for (int j = 0; j < shared.size(); j++) { sumOtus[i] += shared[j]->get(i); } } double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); double totalTerm = 1 / (float) sumOtus[i]; double Aterm = A / sumA; double Bterm = B / sumB; sum += (totalTerm * ((Aterm-Bterm)*(Aterm-Bterm))); } data[0] = sqrt((totalSum * sum)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "StructChi2", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/structchi2.h000077500000000000000000000013511424121717000212030ustar00rootroot00000000000000#ifndef STRUCTCHI2_H #define STRUCTCHI2_H /* * structchi2.h * Mothur * * Created by westcott on 12/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class StructChi2 : public Calculator { public: StructChi2() : Calculator("structchi2", 1, false, true) {}; //the true means this calculator needs all groups to calculate the pair value EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Structchi2"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/structchord.cpp000077500000000000000000000022341424121717000220110ustar00rootroot00000000000000/* * structchord.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "structchord.h" /***********************************************************************/ EstOutput StructChord::getValues(vector shared) { try { data.resize(1,0); double sumAj2 = 0.0; double sumBj2 = 0.0; //calc the 2 denominators for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); //(Aij) ^ 2 sumAj2 += (Aij * Aij); sumBj2 += (Bij * Bij); } sumAj2 = sqrt(sumAj2); sumBj2 = sqrt(sumBj2); //calc sum double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); sum += (((Aij / sumAj2) - (Bij / sumBj2)) * ((Aij / sumAj2) - (Bij / sumBj2))); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "StructChord", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/structchord.h000077500000000000000000000012321424121717000214530ustar00rootroot00000000000000#ifndef STRUCTCHORD_H #define STRUCTCHORD_H /* * structchord.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class StructChord : public Calculator { public: StructChord() : Calculator("structchord", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Structchord"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/structeuclidean.cpp000077500000000000000000000015011424121717000226370ustar00rootroot00000000000000/* * structeuclidean.cpp * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "structeuclidean.h" /***********************************************************************/ EstOutput StructEuclidean::getValues(vector shared) { try { data.resize(1,0); double sum = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); //(Aij - Bij) ^ 2 sum += ((Aij - Bij) * (Aij - Bij)); } data[0] = sqrt(sum); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "StructEuclidean", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/structeuclidean.h000077500000000000000000000012661424121717000223140ustar00rootroot00000000000000#ifndef STRUCTEUCLIDEAN_H #define STRUCTEUCLIDEAN_H /* * structeuclidean.h * Mothur * * Created by westcott on 12/14/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class StructEuclidean : public Calculator { public: StructEuclidean() : Calculator("structeuclidean", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Structeuclidean"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/structkulczynski.cpp000077500000000000000000000016171424121717000231240ustar00rootroot00000000000000/* * structkulczynski.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "structkulczynski.h" /***********************************************************************/ EstOutput StructKulczynski::getValues(vector shared) { try { data.resize(1,0); double sumA = 0.0; double sumB = 0.0; double sumMin = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int A = shared[0]->get(i); int B = shared[1]->get(i); sumA += A; sumB += B; sumMin += min(A, B); } data[0] = 1.0 - (0.5 * ((sumMin / sumA) + (sumMin / sumB))); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "StructKulczynski", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/structkulczynski.h000077500000000000000000000012751424121717000225710ustar00rootroot00000000000000#ifndef STRUCTKULCZYNSKI_H #define STRUCTKULCZYNSKI_H /* * structkulczynski.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class StructKulczynski : public Calculator { public: StructKulczynski() : Calculator("structkulczynski", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Structkulczynski"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/structpearson.cpp000077500000000000000000000024021424121717000223560ustar00rootroot00000000000000/* * structpearson.cpp * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "structpearson.h" /***********************************************************************/ EstOutput StructPearson::getValues(vector shared) { try { data.resize(1,0); int numOTUS = shared[0]->getNumBins(); double averageA = shared[0]->getNumSeqs() / (float) numOTUS; double averageB = shared[1]->getNumSeqs() / (float) numOTUS; double numTerm = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int i = 0; i < shared[0]->getNumBins(); i++) { int Aij = shared[0]->get(i); int Bij = shared[1]->get(i); numTerm += ((Aij - averageA) * (Bij - averageB)); denomTerm1 += ((Aij - averageA) * (Aij - averageA)); denomTerm2 += ((Bij - averageB) * (Bij - averageB)); } denomTerm1 = sqrt(denomTerm1); denomTerm2 = sqrt(denomTerm2); double denom = denomTerm1 * denomTerm2; data[0] = (numTerm / denom); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } return data; } catch(exception& e) { m->errorOut(e, "StructPearson", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/structpearson.h000077500000000000000000000012501424121717000220230ustar00rootroot00000000000000#ifndef STRUCTPEARSON_H #define STRUCTPEARSON_H /* * structpearson.h * Mothur * * Created by westcott on 12/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "calculator.h" /***********************************************************************/ class StructPearson : public Calculator { public: StructPearson() : Calculator("structpearson", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Structpearson"; } private: }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/tn.cpp000077500000000000000000000011671424121717000200720ustar00rootroot00000000000000// // tn.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "tn.hpp" /***********************************************************************/ double TN::getValue(double tp, double tn, double fp, double fn) { try { double tnmax = tn / (double)(tp + tn + fp + fn); if (isnan(tnmax) || isinf(tnmax)) { tnmax = 0; } return tnmax; } catch(exception& e) { m->errorOut(e, "TN", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/tn.hpp000077500000000000000000000011321424121717000200670ustar00rootroot00000000000000// // tn.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef tn_hpp #define tn_hpp #include "calculator.h" /***********************************************************************/ class TN : public ClusterMetric { public: TN() : ClusterMetric("tn") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/TN"; } private: }; /***********************************************************************/ #endif /* tn_hpp */ mothur-1.48.0/source/calculators/tp.cpp000077500000000000000000000011671424121717000200740ustar00rootroot00000000000000// // tp.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "tp.hpp" /***********************************************************************/ double TP::getValue(double tp, double tn, double fp, double fn) { try { double tpmax = tp / (double)(tp + tn + fp + fn); if (isnan(tpmax) || isinf(tpmax)) { tpmax = 0; } return tpmax; } catch(exception& e) { m->errorOut(e, "TP", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/tp.hpp000077500000000000000000000011311424121717000200700ustar00rootroot00000000000000// // tp.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef tp_hpp #define tp_hpp #include "calculator.h" /***********************************************************************/ class TP : public ClusterMetric { public: TP() : ClusterMetric("tp") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/TP"; } private: }; /***********************************************************************/ #endif /* tp_hpp */ mothur-1.48.0/source/calculators/tptn.cpp000077500000000000000000000012301424121717000204250ustar00rootroot00000000000000// // tptn.cpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "tptn.hpp" /***********************************************************************/ double TPTN::getValue(double tp, double tn, double fp, double fn) { try { long long p = tp + tn; double tptn = p / (double)(tp + tn + fp + fn); if (isnan(tptn) || isinf(tptn)) { tptn = 0; } return tptn; } catch(exception& e) { m->errorOut(e, "TPTN", "getValue"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/tptn.hpp000077500000000000000000000011521424121717000204350ustar00rootroot00000000000000// // tptn.hpp // Mothur // // Created by Sarah Westcott on 4/10/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef tptn_hpp #define tptn_hpp #include "calculator.h" /***********************************************************************/ class TPTN : public ClusterMetric { public: TPTN() : ClusterMetric("tptn") {}; double getValue(double tp, double tn, double fp, double fn); string getCitation() { return "http://www.mothur.org/wiki/TPTN"; } private: }; /***********************************************************************/ #endif /* tptn_hpp */ mothur-1.48.0/source/calculators/treecalculator.h000077500000000000000000000021311424121717000221170ustar00rootroot00000000000000#ifndef TREECALCULATOR_H #define TREECALCULATOR_H /* * treecalculator.h * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "tree.h" #include "mothurout.h" #include "utils.hpp" /* The tree calculator class is the parent class for tree calculators in mothur. */ typedef vector EstOutput; /***********************************************************************/ class TreeCalculator { public: TreeCalculator(){ m = MothurOut::getInstance(); } TreeCalculator(string n) : name(n) {}; virtual ~TreeCalculator(){}; virtual EstOutput getValues(Tree*) { return data; } virtual EstOutput getValues(Tree*, int, string) { return data; } virtual EstOutput getValues(Tree*, string, string) { return data; } virtual EstOutput getValues(Tree*, string, string, vector&) { return data; } virtual string getName() { return name; } protected: EstOutput data; string name; MothurOut* m; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/unweighted.cpp000077500000000000000000000435321424121717000216160ustar00rootroot00000000000000/* * unweighted.cpp * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "unweighted.h" /**************************************************************************************************/ Unweighted::Unweighted(bool r, vector G) : includeRoot(r), Groups(G) { try { int numGroups = Groups.size(); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; for (int i=0; i groups; groups.push_back(Groups[i]); groups.push_back(Groups[l]); namesOfGroupCombos.push_back(groups); } } } catch(exception& e) { m->errorOut(e, "Unweighted", "Unweighted"); exit(1); } } /**************************************************************************************************/ EstOutput Unweighted::getValues(Tree* t, int p) { try { processors = p; return (createProcesses(t)); } catch(exception& e) { m->errorOut(e, "Unweighted", "getValues"); exit(1); } } /***********************************************************************/ struct unweightedData { int start; int num, count; MothurOut* m; EstOutput results; vector< vector > namesOfGroupCombos; vector > randomizedTreeNodes; Tree* t; CountTable* ct; bool includeRoot; vector Treenames; Utils util; unweightedData(){} unweightedData(int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir) { m = MothurOut::getInstance(); start = st; num = en; namesOfGroupCombos = ngc; t = tree; ct = count; includeRoot = ir; Treenames = tree->getTreeNames(); results.resize(num); count = 0; } unweightedData(int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir, vector > randomTreeNodes) { m = MothurOut::getInstance(); start = st; num = en; namesOfGroupCombos = ngc; randomizedTreeNodes = randomTreeNodes; t = tree; ct = count; includeRoot = ir; Treenames = tree->getTreeNames(); results.resize(num); count = 0; } }; /**************************************************************************************************/ int findNodeBelonging(MothurOut* m, vector& namesOfGroupCombos, map< string, vector >& groupNodeInfo) { try { int nodeBelonging = -1; for (int g = 0; g < namesOfGroupCombos.size(); g++) { if (groupNodeInfo[namesOfGroupCombos[g]].size() != 0) { nodeBelonging = groupNodeInfo[namesOfGroupCombos[g]][0]; break; } } //sanity check if (nodeBelonging == -1) { m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); for (int g = 0; g < namesOfGroupCombos.size()-1; g++) { m->mothurOut(namesOfGroupCombos[g] + "-"); } m->mothurOut(namesOfGroupCombos[namesOfGroupCombos.size()-1]); m->mothurOut(", skipping.\n"); } return nodeBelonging; } catch(exception& e) { m->errorOut(e, "Weighted", "findNodeBelongingToThisComparison"); exit(1); } } /**************************************************************************************************/ void getRoot2(MothurOut* m, Tree* t, int v, vector grouping, set& rootForGrouping) { try { //you are a leaf so get your parent int index = t->tree[v].getParent(); //my parent is a potential root rootForGrouping.insert(index); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->getControl_pressed()) { return; } //am I the root for this grouping? if so I want to stop "early" //does my sibling have descendants from the users groups? //if so I am not the root int parent = t->tree[index].getParent(); int lc = t->tree[parent].getLChild(); int rc = t->tree[parent].getRChild(); int sib = lc; if (lc == index) { sib = rc; } map::iterator itGroup; int pcountSize = 0; for (int j = 0; j < grouping.size(); j++) { map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } //if yes, I am not the root if (pcountSize != 0) { rootForGrouping.clear(); rootForGrouping.insert(parent); } index = parent; } //get all nodes above the root to add so we don't add their u values above index = *(rootForGrouping.begin()); while(t->tree[index].getParent() != -1){ int parent = t->tree[index].getParent(); rootForGrouping.insert(parent); index = parent; } return; } catch(exception& e) { m->errorOut(e, "Weighted", "getRoot2"); exit(1); } } /**************************************************************************************************/ void driverUnweighted(unweightedData* params) { try { params->count = 0; for (int h = params->start; h < (params->start+params->num); h++) { if (params->m->getControl_pressed()) { break; } double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group double totalBL = 0.00; //all branch lengths double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; set rootBranches; //if not including root this will hold branches that are "above" the root for this comparison int nodeBelonging = findNodeBelonging(params->m, params->namesOfGroupCombos[h], params->t->groupNodeInfo); if (nodeBelonging != -1) { //fills rootBranches to exclude, if including the root then rootBranches should be empty. if (!params->includeRoot) { getRoot2(params->m, params->t, nodeBelonging, params->namesOfGroupCombos[h], rootBranches); } for(int i=0;it->getNumNodes();i++){ if (params->m->getControl_pressed()) { break; } //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want //pcountSize = 2, not unique to one group //pcountSize = 1, unique to one group int pcountSize = 0; for (int j = 0; j < params->namesOfGroupCombos[h].size(); j++) { map::iterator itGroup = params->t->tree[i].pcount.find(params->namesOfGroupCombos[h][j]); if (itGroup != params->t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } //unique calc if (pcountSize == 0) { } else if (!params->util.isEqual(params->t->tree[i].getBranchLength(), -1) && (pcountSize == 1) && (rootBranches.count(i) == 0)) { //you have a unique branch length and you are not the root UniqueBL += abs(params->t->tree[i].getBranchLength()); } //total calc if (pcountSize == 0) { } else if (!params->util.isEqual(params->t->tree[i].getBranchLength(), -1) && (pcountSize != 0) && (rootBranches.count(i) == 0)) { //you have a branch length and you are not the root totalBL += abs(params->t->tree[i].getBranchLength()); } } UW = (UniqueBL / totalBL); if (isnan(UW) || isinf(UW)) { UW = 0; } params->results[params->count] = UW; } params->count++; } } catch(exception& e) { params->m->errorOut(e, "Unweighted", "driverUnweighted"); exit(1); } } /**************************************************************************************************/ EstOutput Unweighted::createProcesses(Tree* t) { try { vector lines; int remainingPairs = namesOfGroupCombos.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; vector Treenames; Treenames = t->getTreeNames(); CountTable* ct = t->getCountTable(); //Lauch worker threads for (int i = 0; i < processors-1; i++) { CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); unweightedData* dataBundle = new unweightedData(lines[i+1].start, lines[i+1].end, namesOfGroupCombos, copyTree, copyCount, includeRoot); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverUnweighted, dataBundle)); } CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); unweightedData* dataBundle = new unweightedData(lines[0].start, lines[0].end, namesOfGroupCombos, t, ct, includeRoot); driverUnweighted(dataBundle); EstOutput results = dataBundle->results; delete copyTree; delete copyCount; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->results.size(); j++) { results.push_back(data[i]->results[j]); } if (data[i]->count != data[i]->num) { //you didn't complete your tasks m->mothurOut("[ERROR]: thread " + toString(i+1) + " failed to complete it's tasks, quitting.\n"); m->setControl_pressed(true); } delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } return results; } catch(exception& e) { m->errorOut(e, "Unweighted", "createProcesses"); exit(1); } } /**************************************************************************************************/ EstOutput Unweighted::getValues(Tree* t, vector >& randomTreeNodes, int p) { try { processors = p; return (createProcesses(t, randomTreeNodes)); } catch(exception& e) { m->errorOut(e, "Unweighted", "getValues"); exit(1); } } /**************************************************************************************************/ void driverRandomCalcs(unweightedData* params) { try { params->count = 0; vector Treenames = params->t->getTreeNames(); Tree* copyTree = new Tree(params->ct, Treenames); for (int h = params->start; h < (params->start+params->num); h++) { if (params->m->getControl_pressed()) { break; } //copy random tree passed in copyTree->getCopy(params->t); //swap labels in the groups you want to compare copyTree->assembleRandomUnifracTree(params->randomizedTreeNodes[h]); double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group double totalBL = 0.00; //all branch lengths double UW = 0.00; //Unweighted Value = UniqueBL / totalBL; //find a node that belongs to one of the groups in this combo set rootBranches; //if not including root this will hold branches that are "above" the root for this comparison int nodeBelonging = findNodeBelonging(params->m, params->namesOfGroupCombos[h], params->t->groupNodeInfo); if (nodeBelonging != -1) { //fills rootBranches to exclude, if including the root then rootBranches should be empty. if (!params->includeRoot) { getRoot2(params->m, params->t, nodeBelonging, params->namesOfGroupCombos[h], rootBranches); } for(int i=0;igetNumNodes();i++){ if (params->m->getControl_pressed()) { break; } //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want //pcountSize = 2, not unique to one group //pcountSize = 1, unique to one group int pcountSize = 0; for (int j = 0; j < params->namesOfGroupCombos[h].size(); j++) { map::iterator itGroup = copyTree->tree[i].pcount.find(params->namesOfGroupCombos[h][j]); if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } //unique calc if (pcountSize == 0) { } else if (!params->util.isEqual(copyTree->tree[i].getBranchLength(), -1) && (pcountSize == 1) && (rootBranches.count(i) == 0)) { //you have a unique branch length and you are not the root UniqueBL += abs(copyTree->tree[i].getBranchLength()); } //total calc if (pcountSize == 0) { } else if (!params->util.isEqual(copyTree->tree[i].getBranchLength(), -1) && (pcountSize != 0) && (rootBranches.count(i) == 0)) { //you have a branch length and you are not the root totalBL += abs(copyTree->tree[i].getBranchLength()); } } UW = (UniqueBL / totalBL); if (isnan(UW) || isinf(UW)) { UW = 0; } params->results[params->count] = UW; } params->count++; } delete copyTree; } catch(exception& e) { params->m->errorOut(e, "Unweighted", "driverRandomCalcs"); exit(1); } } /**************************************************************************************************/ EstOutput Unweighted::createProcesses(Tree* t, vector >& randomTreeNodes) { try { vector lines; int remainingPairs = namesOfGroupCombos.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; vector Treenames; Treenames = t->getTreeNames(); CountTable* ct = t->getCountTable(); //Lauch worker threads for (int i = 0; i < processors-1; i++) { CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); unweightedData* dataBundle = new unweightedData(lines[i+1].start, lines[i+1].end, namesOfGroupCombos, copyTree, copyCount, includeRoot, randomTreeNodes); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverRandomCalcs, dataBundle)); } unweightedData* dataBundle = new unweightedData(lines[0].start, lines[0].end, namesOfGroupCombos, t, ct, includeRoot, randomTreeNodes); driverRandomCalcs(dataBundle); EstOutput results = dataBundle->results; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->results.size(); j++) { results.push_back(data[i]->results[j]); } if (data[i]->count != data[i]->num) { //you didn't complete your tasks m->mothurOut("[ERROR]: thread " + toString(i+1) + " failed to complete it's tasks, quitting.\n"); m->setControl_pressed(true); } delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } return results; } catch(exception& e) { m->errorOut(e, "Unweighted", "createProcesses"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/calculators/unweighted.h000077500000000000000000000015701424121717000212570ustar00rootroot00000000000000#ifndef UNWEIGHTED_H #define UNWEIGHTED_H /* * unweighted.h * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treecalculator.h" #include "counttable.h" /***********************************************************************/ class Unweighted : public TreeCalculator { public: Unweighted(bool r, vector g); ~Unweighted() = default; EstOutput getValues(Tree*, int); EstOutput getValues(Tree*, vector >&, int); private: vector< vector > namesOfGroupCombos; vector Groups; int processors; bool includeRoot; EstOutput createProcesses(Tree*); EstOutput createProcesses(Tree*, vector >&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/uvest.cpp000077500000000000000000000053701424121717000206170ustar00rootroot00000000000000/* * uvest.cpp * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "uvest.h" /***********************************************************************/ //This is used by SharedJAbund and SharedSorAbund EstOutput UVEst::getUVest(vector shared) { try { EstOutput results; results.resize(2,0); int S12, Atotal, Btotal, f1A, f2A, f1B, f2B, sumSharedA, sumSharedB, sumSharedA1, sumSharedB1, tempA, tempB; S12 = 0; Atotal = 0; Btotal = 0; f1A = 0; f2A = 0; f1B = 0; f2B = 0; sumSharedA = 0; sumSharedB = 0; sumSharedA1 = 0; sumSharedB1 = 0; float Upart1, Upart2, Upart3, Vpart1, Vpart2, Vpart3, Uest, Vest; Upart1 = 0.0; Upart2 = 0.0; Upart3 = 0.0; Vpart1 = 0.0; Vpart2 = 0.0; Vpart3 = 0.0; /*Xi, Yi = abundance of the ith shared OTU in A and B ntotal, mtotal = total number of sequences sampled in A and B I(•) = if the argument, •, is true then I(•) is 1; otherwise it is 0. sumSharedA = the sum of all shared otus in A sumSharedB = the sum of all shared otus in B sumSharedA1 = the sum of all shared otus in A where B = 1 sumSharedB1 = the sum of all shared otus in B where A = 1 */ for (int i = 0; i < shared[0]->getNumBins(); i++) { //store in temps to avoid multiple repetitive function calls tempA = shared[0]->get(i); tempB = shared[1]->get(i); Atotal += tempA; Btotal += tempB; if ((tempA != 0) && (tempB != 0)) {//they are shared sumSharedA += tempA; sumSharedB += tempB; //does A have one or two if (tempA == 1) { f1A++; sumSharedB1 += tempB;} else if (tempA == 2) { f2A++; } //does B have one or two if (tempB == 1) { f1B++; sumSharedA1 += tempA;} else if (tempB == 2) { f2B++; } } } Upart1 = sumSharedA / (float) Atotal; Upart2 = ((Btotal - 1) * f1B) / (float) (Btotal * 2 * f2B); Upart3 = sumSharedA1 / (float) Atotal; if (isnan(Upart1) || isinf(Upart1)) { Upart1 = 0; } if (isnan(Upart2) || isinf(Upart2)) { Upart2 = 0; } if (isnan(Upart3) || isinf(Upart3)) { Upart3 = 0; } Uest = Upart1 + (Upart2 * Upart3); Vpart1 = sumSharedB / (float) Btotal; Vpart2 = ((Atotal - 1) * f1A) / (float) (Atotal * 2 * f2A); Vpart3 = sumSharedB1 / (float) Btotal; if (isnan(Vpart1) || isinf(Vpart1)) { Vpart1 = 0; } if (isnan(Vpart2) || isinf(Vpart2)) { Vpart2 = 0; } if (isnan(Vpart3) || isinf(Vpart3)) { Vpart3 = 0; } Vest = Vpart1 + (Vpart2 * Vpart3); if (Uest > 1) { Uest = 1; } if (Vest > 1) { Vest = 1; } results[0] = Uest; results[1] = Vest; return results; } catch(exception& e) { m->errorOut(e, "UVEst", "getUVest"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/uvest.h000077500000000000000000000013201424121717000202530ustar00rootroot00000000000000#ifndef UVEST_H #define UVEST_H /* * uvest.h * Dotur * * Created by Sarah Westcott on 1/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class implements the UVEst estimator on two groups. It is used by sharedJAbund and SharedSorensonAbund. */ #include "mothurout.h" #include "calculator.h" typedef vector EstOutput; /***********************************************************************/ class UVEst { public: UVEst() { m = MothurOut::getInstance(); } ~UVEst() = default; EstOutput getUVest(vector); private: MothurOut* m; }; /***********************************************************************/ #endif mothur-1.48.0/source/calculators/weighted.cpp000077500000000000000000000376011424121717000212530ustar00rootroot00000000000000/* * weighted.cpp * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "weighted.h" /**************************************************************************************************/ Weighted::Weighted(bool r, vector G) : includeRoot(r), Groups(G) { try { int numGroups = Groups.size(); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; for (int i=0; i groups; groups.push_back(Groups[i]); groups.push_back(Groups[l]); namesOfGroupCombos.push_back(groups); } } } catch(exception& e) { m->errorOut(e, "Weighted", "getValues"); exit(1); } } /**************************************************************************************************/ EstOutput Weighted::getValues(Tree* t, int p) { try { processors = p; return (createProcesses(t)); } catch(exception& e) { m->errorOut(e, "Weighted", "getValues"); exit(1); } } /***********************************************************************/ struct weightedData { int start; int num, count; MothurOut* m; EstOutput results; vector< vector > namesOfGroupCombos; Tree* t; CountTable* ct; bool includeRoot; weightedData(){} weightedData(int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir) { m = MothurOut::getInstance(); start = st; num = en; namesOfGroupCombos = ngc; t = tree; ct = count; includeRoot = ir; count = 0; } }; /**************************************************************************************************/ void getRoot(MothurOut* m, Tree* t, int v, vector grouping, set& rootForGrouping) { try { //you are a leaf so get your parent int index = t->tree[v].getParent(); //my parent is a potential root rootForGrouping.insert(index); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->getControl_pressed()) { return; } //am I the root for this grouping? if so I want to stop "early" //does my sibling have descendants from the users groups? //if so I am not the root int parent = t->tree[index].getParent(); int lc = t->tree[parent].getLChild(); int rc = t->tree[parent].getRChild(); int sib = lc; if (lc == index) { sib = rc; } map::iterator itGroup; int pcountSize = 0; for (int j = 0; j < grouping.size(); j++) { map::iterator itGroup = t->tree[sib].pcount.find(grouping[j]); if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } } //if yes, I am not the root if (pcountSize != 0) { rootForGrouping.clear(); rootForGrouping.insert(parent); } index = parent; } //get all nodes above the root to add so we don't add their u values above index = *(rootForGrouping.begin()); while(t->tree[index].getParent() != -1){ int parent = t->tree[index].getParent(); rootForGrouping.insert(parent); index = parent; } return; } catch(exception& e) { m->errorOut(e, "Weighted", "getRoot"); exit(1); } } /**************************************************************************************************/ double getLengthToRoot(MothurOut* m, Tree* t, int v, set roots, vector& leafToTreeRoot) { try { double sum = 0.0; int index = v; Utils util; //find length to complete tree root and save if (util.isEqual(leafToTreeRoot[v], 0.0)) { //you are a leaf if(!util.isEqual(t->tree[index].getBranchLength(), -1)){ leafToTreeRoot[v] += abs(t->tree[index].getBranchLength()); } index = t->tree[index].getParent(); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->getControl_pressed()) { return sum; } int parent = t->tree[index].getParent(); if (!util.isEqual(t->tree[index].getBranchLength(), -1)) { leafToTreeRoot[v] += abs(t->tree[index].getBranchLength()); } index = parent; } } index = v; sum = leafToTreeRoot[v]; //subtract excess root for this grouping for (set::iterator it = roots.begin(); it != roots.end(); it++) { if (!util.isEqual(t->tree[*it].getBranchLength(), -1)) { sum -= abs(t->tree[*it].getBranchLength()); } } return sum; } catch(exception& e) { m->errorOut(e, "Weighted", "getLengthToRoot"); exit(1); } } /**************************************************************************************************/ int findNodeBelongingToThisComparison(MothurOut* m, vector& namesOfGroupCombos, map< string, vector >& groupNodeInfo) { try { int nodeBelonging = -1; for (int g = 0; g < namesOfGroupCombos.size(); g++) { if (groupNodeInfo[namesOfGroupCombos[g]].size() != 0) { nodeBelonging = groupNodeInfo[namesOfGroupCombos[g]][0]; break; } } //sanity check if (nodeBelonging == -1) { m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping "); for (int g = 0; g < namesOfGroupCombos.size()-1; g++) { m->mothurOut(namesOfGroupCombos[g] + "-"); } m->mothurOut(namesOfGroupCombos[namesOfGroupCombos.size()-1]); m->mothurOut(", skipping.\n"); } return nodeBelonging; } catch(exception& e) { m->errorOut(e, "Weighted", "findNodeBelongingToThisComparison"); exit(1); } } /**************************************************************************************************/ double findNumerator(MothurOut* m, Tree* t, set& rootBranches, string groupA, string groupB, int groupACount, int groupBCount) { try { Utils util; double WScore = 0.0; for(int i=0;igetNumNodes();i++){ if (m->getControl_pressed()) { break; } double u = 0.00; //does this node have descendants from groupA map::iterator it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { u = (double) it->second / (double) groupACount; } //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { u -= (double) it->second / (double) groupBCount; } if (!util.isEqual(t->tree[i].getBranchLength(), -1)) { //if this is not the root then add it if (rootBranches.count(i) == 0) { u = abs(u * t->tree[i].getBranchLength()); WScore += u; } } } return WScore; } catch(exception& e) { m->errorOut(e, "Weighted", "findNumerator"); exit(1); } } /**************************************************************************************************/ double findWeightedSums(MothurOut* m, Tree* t, set& rootBranches, vector& nodeToRootLength, string groupA, int groupACount) { try { double D = 0.0; //adding the wieghted sums from groupA for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from groupA map::iterator it = t->tree[t->groupNodeInfo[groupA][j]].pcount.find(groupA); int numSeqsInGroupI = it->second; double sum = getLengthToRoot(m, t, t->groupNodeInfo[groupA][j], rootBranches, nodeToRootLength); double weightedSum = ((numSeqsInGroupI * sum) / (double) groupACount); D += weightedSum; } return D; } catch(exception& e) { m->errorOut(e, "Weighted", "findNumerator"); exit(1); } } /**************************************************************************************************/ void driverWeighted(weightedData* params) { try { Utils util; params->count = 0; vector nodeToRootLength; //length from leaf to tree root, grouping root maybe smaller. Used as reference and excess root is deducted if neccasary nodeToRootLength.resize(params->t->getNumLeaves(), 0.0); //set all leaf nodes length to root to zero for (int h = params->start; h < (params->start+params->num); h++) { if (params->m->getControl_pressed()) { break; } //initialize weighted score string groupA = params->namesOfGroupCombos[h][0]; string groupB = params->namesOfGroupCombos[h][1]; int groupACount = params->ct->getGroupCount(groupA); int groupBCount = params->ct->getGroupCount(groupB); set rootBranches; //if not including root this will hold branches that are "above" the root for this comparison double WScore = 0.0; double D = 0.0; //find a node that belongs to one of the groups in this combo int nodeBelonging = findNodeBelongingToThisComparison(params->m, params->namesOfGroupCombos[h], params->t->groupNodeInfo); if (nodeBelonging != -1) { //fills rootBranches to exclude, if including the root then rootBranches should be empty. if (!params->includeRoot) { getRoot(params->m, params->t, nodeBelonging, params->namesOfGroupCombos[h], rootBranches); } WScore = findNumerator(params->m, params->t, rootBranches, groupA, groupB, groupACount, groupBCount); D += findWeightedSums(params->m, params->t, rootBranches, nodeToRootLength, groupA, groupACount); D += findWeightedSums(params->m, params->t, rootBranches, nodeToRootLength, groupB, groupBCount); double result = (WScore / D); if (isnan(result) || isinf(result)) { result = 0; } params->results.push_back(result); }else { params->results.push_back(0.0); } params->count++; } } catch(exception& e) { params->m->errorOut(e, "Weighted", "driverWeighted"); exit(1); } } /**************************************************************************************************/ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { try { EstOutput data; CountTable* ct = t->getCountTable(); vector nodeToRootLength; //length from leaf to tree root, grouping root maybe smaller. Used as reference and excess root is deducted if neccesary nodeToRootLength.resize(t->getNumLeaves(), 0.0); //set all leaf nodes length to root to zero vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); int groupACount = ct->getGroupCount(groupA); int groupBCount = ct->getGroupCount(groupB); set rootBranches; //if not including root this will hold branches that are "above" the root for this comparison double WScore = 0.0; double D = 0.0; //find a node that belongs to one of the groups in this combo int nodeBelonging = findNodeBelongingToThisComparison(m, grouping, t->groupNodeInfo); if (nodeBelonging != -1) { if (!includeRoot) { getRoot(m, t, nodeBelonging, grouping, rootBranches); } WScore = findNumerator(m, t, rootBranches, groupA, groupB, groupACount, groupBCount); D += findWeightedSums(m, t, rootBranches, nodeToRootLength, groupA, groupACount); D += findWeightedSums(m, t, rootBranches, nodeToRootLength, groupB, groupBCount); double result = (WScore / D); if (isnan(result) || isinf(result)) { result = 0; } data.push_back(result); }else { data.push_back(0.0); } return data; } catch(exception& e) { m->errorOut(e, "Weighted", "getValues"); exit(1); } } /**************************************************************************************************/ EstOutput Weighted::createProcesses(Tree* t) { try { vector lines; int remainingPairs = namesOfGroupCombos.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; vector Treenames; Treenames = t->getTreeNames(); CountTable* ct = t->getCountTable(); //Lauch worker threads for (int i = 0; i < processors-1; i++) { CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); weightedData* dataBundle = new weightedData(lines[i+1].start, lines[i+1].end, namesOfGroupCombos, copyTree, copyCount, includeRoot); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverWeighted, dataBundle)); } CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); weightedData* dataBundle = new weightedData(lines[0].start, lines[0].end, namesOfGroupCombos, copyTree, copyCount, includeRoot); driverWeighted(dataBundle); EstOutput results = dataBundle->results; delete copyTree; delete copyCount; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->results.size(); j++) { results.push_back(data[i]->results[j]); } if (data[i]->count != data[i]->num) { //you didn't complete your tasks m->mothurOut("[ERROR]: thread " + toString(i+1) + " failed to complete it's tasks, quitting.\n"); m->setControl_pressed(true); } delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } return results; } catch(exception& e) { m->errorOut(e, "Weighted", "createProcesses"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/calculators/weighted.h000077500000000000000000000015111424121717000207070ustar00rootroot00000000000000#ifndef WEIGHTED_H #define WEIGHTED_H /* * weighted.h * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treecalculator.h" #include "counttable.h" /***********************************************************************/ class Weighted : public TreeCalculator { public: Weighted( bool r, vector G); ~Weighted() = default; EstOutput getValues(Tree*, string, string); EstOutput getValues(Tree*, int); private: Utils util; int processors; bool includeRoot; vector Groups; vector< vector > namesOfGroupCombos; EstOutput createProcesses(Tree*); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/calculators/whittaker.cpp000077500000000000000000000013651424121717000214530ustar00rootroot00000000000000/* * whittaker.cpp * Mothur * * Created by Pat Schloss on 4/23/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "whittaker.h" /***********************************************************************/ EstOutput Whittaker::getValues(vector shared){ try{ data.resize(1); int countA = 0; int countB = 0; int sTotal = shared[0]->getNumBins(); for(int i=0;iget(i) != 0){ countA++; } if(shared[1]->get(i) != 0){ countB++; } } data[0] = 2-2*sTotal/(float)(countA+countB); return data; } catch(exception& e) { m->errorOut(e, "Whittaker", "getValues"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/calculators/whittaker.h000077500000000000000000000013671424121717000211220ustar00rootroot00000000000000#ifndef WHITTAKER_H #define WHITTAKER_H /* * whittaker.h * Mothur * * Created by Thomas Ryabin on 3/13/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" /*This class implements the Whittaker estimator on 2 groups. It is a child of the calculator class.*/ /***********************************************************************/ class Whittaker : public Calculator { public: Whittaker() : Calculator("whittaker", 1, false) {}; EstOutput getValues(SAbundVector*) {return data;}; EstOutput getValues(vector); string getCitation() { return "http://www.mothur.org/wiki/Whittaker"; } }; /***********************************************************************/ #endif mothur-1.48.0/source/checked.h000077500000000000000000000276411424121717000161750ustar00rootroot00000000000000// Copyright 2006-2016 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include "core.h" #include namespace utf8 { // Base for the exceptions that may be thrown from the library class exception : public ::std::exception { }; // Exceptions that may be thrown from the library functions. class invalid_code_point : public exception { uint32_t cp; public: invalid_code_point(uint32_t codepoint) : cp(codepoint) {} virtual const char* what() const throw() { return "Invalid code point"; } uint32_t code_point() const {return cp;} }; class invalid_utf8 : public exception { uint8_t u8; public: invalid_utf8 (uint8_t u) : u8(u) {} virtual const char* what() const throw() { return "Invalid UTF-8"; } uint8_t utf8_octet() const {return u8;} }; class invalid_utf16 : public exception { uint16_t u16; public: invalid_utf16 (uint16_t u) : u16(u) {} virtual const char* what() const throw() { return "Invalid UTF-16"; } uint16_t utf16_word() const {return u16;} }; class not_enough_room : public exception { public: virtual const char* what() const throw() { return "Not enough space"; } }; /// The library API - functions intended to be called by the users template octet_iterator append(uint32_t cp, octet_iterator result) { if (!utf8::internal::is_code_point_valid(cp)) throw invalid_code_point(cp); if (cp < 0x80) // one octet *(result++) = static_cast(cp); else if (cp < 0x800) { // two octets *(result++) = static_cast((cp >> 6) | 0xc0); *(result++) = static_cast((cp & 0x3f) | 0x80); } else if (cp < 0x10000) { // three octets *(result++) = static_cast((cp >> 12) | 0xe0); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } else { // four octets *(result++) = static_cast((cp >> 18) | 0xf0); *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(result++) = static_cast((cp & 0x3f) | 0x80); } return result; } template output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) { while (start != end) { octet_iterator sequence_start = start; internal::utf_error err_code = utf8::internal::validate_next(start, end); switch (err_code) { case internal::UTF8_OK : for (octet_iterator it = sequence_start; it != start; ++it) *out++ = *it; break; case internal::NOT_ENOUGH_ROOM: throw not_enough_room(); case internal::INVALID_LEAD: out = utf8::append (replacement, out); ++start; break; case internal::INCOMPLETE_SEQUENCE: case internal::OVERLONG_SEQUENCE: case internal::INVALID_CODE_POINT: out = utf8::append (replacement, out); ++start; // just one replacement mark for the sequence while (start != end && utf8::internal::is_trail(*start)) ++start; break; } } return out; } template inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) { static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); return utf8::replace_invalid(start, end, out, replacement_marker); } template uint32_t next(octet_iterator& it, octet_iterator end) { uint32_t cp = 0; internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); switch (err_code) { case internal::UTF8_OK : break; case internal::NOT_ENOUGH_ROOM : throw not_enough_room(); case internal::INVALID_LEAD : case internal::INCOMPLETE_SEQUENCE : case internal::OVERLONG_SEQUENCE : throw invalid_utf8(*it); case internal::INVALID_CODE_POINT : throw invalid_code_point(cp); } return cp; } template uint32_t peek_next(octet_iterator it, octet_iterator end) { return utf8::next(it, end); } template uint32_t prior(octet_iterator& it, octet_iterator start) { // can't do much if it == start if (it == start) throw not_enough_room(); octet_iterator end = it; // Go back until we hit either a lead octet or start while (utf8::internal::is_trail(*(--it))) if (it == start) throw invalid_utf8(*it); // error - no lead byte in the sequence return utf8::peek_next(it, end); } /// Deprecated in versions that include "prior" template uint32_t previous(octet_iterator& it, octet_iterator pass_start) { octet_iterator end = it; while (utf8::internal::is_trail(*(--it))) if (it == pass_start) throw invalid_utf8(*it); // error - no lead byte in the sequence octet_iterator temp = it; return utf8::next(temp, end); } template void advance (octet_iterator& it, distance_type n, octet_iterator end) { for (distance_type i = 0; i < n; ++i) utf8::next(it, end); } template typename std::iterator_traits::difference_type distance (octet_iterator first, octet_iterator last) { typename std::iterator_traits::difference_type dist; for (dist = 0; first < last; ++dist) utf8::next(first, last); return dist; } template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) { while (start != end) { uint32_t cp = utf8::internal::mask16(*start++); // Take care of surrogate pairs first if (utf8::internal::is_lead_surrogate(cp)) { if (start != end) { uint32_t trail_surrogate = utf8::internal::mask16(*start++); if (utf8::internal::is_trail_surrogate(trail_surrogate)) cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; else throw invalid_utf16(static_cast(trail_surrogate)); } else throw invalid_utf16(static_cast(cp)); } // Lone trail surrogate else if (utf8::internal::is_trail_surrogate(cp)) throw invalid_utf16(static_cast(cp)); result = utf8::append(cp, result); } return result; } template u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { while (start < end) { uint32_t cp = utf8::next(start, end); if (cp > 0xffff) { //make a surrogate pair *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); } else *result++ = static_cast(cp); } return result; } template octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) { while (start != end) result = utf8::append(*(start++), result); return result; } template u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { while (start < end) (*result++) = utf8::next(start, end); return result; } // The iterator class template class iterator : public std::iterator { octet_iterator it; octet_iterator range_start; octet_iterator range_end; public: iterator () = default; explicit iterator (const octet_iterator& octet_it, const octet_iterator& rangestart, const octet_iterator& rangeend) : it(octet_it), range_start(rangestart), range_end(rangeend) { if (it < range_start || it > range_end) throw std::out_of_range("Invalid utf-8 iterator position"); } // the default "big three" are OK octet_iterator base () const { return it; } uint32_t operator * () const { octet_iterator temp = it; return utf8::next(temp, range_end); } bool operator == (const iterator& rhs) const { if (range_start != rhs.range_start || range_end != rhs.range_end) throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); return (it == rhs.it); } bool operator != (const iterator& rhs) const { return !(operator == (rhs)); } iterator& operator ++ () { utf8::next(it, range_end); return *this; } iterator operator ++ (int) { iterator temp = *this; utf8::next(it, range_end); return temp; } iterator& operator -- () { utf8::prior(it, range_start); return *this; } iterator operator -- (int) { iterator temp = *this; utf8::prior(it, range_start); return temp; } }; // class iterator } // namespace utf8 #endif //header guard mothur-1.48.0/source/chimera/000077500000000000000000000000001424121717000160315ustar00rootroot00000000000000mothur-1.48.0/source/chimera/README.txt000066400000000000000000000011441424121717000175270ustar00rootroot00000000000000The MothurChimera class is parent to the classes in this grouping. Chimera checking preferred methods (not in this grouping, implemented ): * chimera.uchime which is a wrapper for the uchime program * chimera.vsearch which is a wrapper for the vsearch program /********************************************************************/ Bellerophon class implements the oldest chimera checking method. It is legacy code. Other methods are much faster and more accurate. /********************************************************************/ /********************************************************************/ mothur-1.48.0/source/chimera/bellerophon.cpp000066400000000000000000000354701424121717000210570ustar00rootroot00000000000000/* * bellerophon.cpp * Mothur * * Created by Sarah Westcott on 7/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "bellerophon.h" #include "eachgapdist.h" #include "ignoregaps.h" #include "onegapdist.h" /***************************************************************************************************************/ Bellerophon::Bellerophon(string name, bool filterSeqs, bool c, int win, int inc, string o) : MothurChimera() { try { fastafile = name; correction = c; outputDir = o; window = win; increment = inc; //read in sequences seqs = readSeqs(fastafile); numSeqs = seqs.size(); if (numSeqs == 0) { m->mothurOut("Error in reading you sequences.\n"); exit(1); } //do soft filter if (filterSeqs) { createFilter(seqs, 0.5); for (int i = 0; i < seqs.size(); i++) { runFilter(seqs[i]); } } distCalculator = new eachGapDist(1.0); //set default window to 25% of sequence length string seq0 = seqs[0]->getAligned(); if (window == 0) { window = seq0.length() / 4; } else if (window > (seq0.length() / 2)) { m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2)) + "\n"); window = (seq0.length() / 2); } if (increment > (seqs[0]->getAlignLength() - (2*window))) { if (increment != 10) { m->mothurOut("You have selected a increment that is too large. I will use the default.\n"); increment = 10; if (increment > (seqs[0]->getAlignLength() - (2*window))) { increment = 0; } }else{ increment = 0; } } if (increment == 0) { iters = 1; } else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); } //initialize pref pref.resize(iters); for (int i = 0; i < iters; i++) { Preference temp; for (int j = 0; j < numSeqs; j++) { pref[i].push_back(temp); } } } catch(exception& e) { m->errorOut(e, "Bellerophon", "Bellerophon"); exit(1); } } //*************************************************************************************************************** int Bellerophon::print(ostream& out, ostream& outAcc, string s) { try { int above1 = 0; //sorted "best" preference scores for all seqs vector best = getBestPref(); if (m->getControl_pressed()) { return numSeqs; } out << "Name\tScore\tLeft\tRight\t" << endl; //output prefenence structure to .chimeras file for (int i = 0; i < best.size(); i++) { if (m->getControl_pressed()) { return numSeqs; } out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl; //calc # of seqs with preference above 1.0 if (best[i].score > 1.0) { above1++; outAcc << best[i].name << endl; m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint) + "\n"); m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent + "\n"); } } //output results to screen m->mothurOut("\nSequence with preference score above 1.0: " + toString(above1) + "\n"); int spot; spot = best.size()-1; m->mothurOut("Minimum:\t" + toString(best[spot].score) + "\n"); spot = best.size() * 0.975; m->mothurOut("2.5%-tile:\t" + toString(best[spot].score) + "\n"); spot = best.size() * 0.75; m->mothurOut("25%-tile:\t" + toString(best[spot].score) + "\n"); spot = best.size() * 0.50; m->mothurOut("Median: \t" + toString(best[spot].score) + "\n"); spot = best.size() * 0.25; m->mothurOut("75%-tile:\t" + toString(best[spot].score) + "\n"); spot = best.size() * 0.025; m->mothurOut("97.5%-tile:\t" + toString(best[spot].score) + "\n"); spot = 0; m->mothurOut("Maximum:\t" + toString(best[spot].score) + "\n"); return numSeqs; } catch(exception& e) { m->errorOut(e, "Bellerophon", "print"); exit(1); } } //******************************************************************************************************************** //sorts highest score to lowest inline bool comparePref(Preference left, Preference right){ return (left.score > right.score); } //*************************************************************************************************************** int Bellerophon::getChimeras() { try { //create breaking points vector midpoints; midpoints.resize(iters, window); for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; } //fill pref with scores driverChimeras(midpoints); return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getChimeras"); exit(1); } } //*************************************************************************************************************** int Bellerophon::driverChimeras(vector midpoints) { try { for (int h = 0; h < iters; h++) { count = h; int midpoint = midpoints[h]; //initialize pref[count] for (int i = 0; i < numSeqs; i++ ) { pref[count][i].name = seqs[i]->getName(); pref[count][i].midpoint = midpoint; } if (m->getControl_pressed()) { return 0; } //create 2 vectors of sequences, 1 for left side and one for right side vector left; vector right; for (int i = 0; i < seqs.size(); i++) { if (m->getControl_pressed()) { return 0; } //save left side string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window); Sequence tempLeft; tempLeft.setName(seqs[i]->getName()); tempLeft.setAligned(seqLeft); left.push_back(tempLeft); //save right side string seqRight = seqs[i]->getAligned().substr(midpoint, window); Sequence tempRight; tempRight.setName(seqs[i]->getName()); tempRight.setAligned(seqRight); right.push_back(tempRight); } //this should be parallelized //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | ) //create a matrix containing the distance from left to left and right to right //calculate distances SparseMatrix* SparseLeft = new SparseMatrix(); SparseMatrix* SparseRight = new SparseMatrix(); createSparseMatrix(0, left.size(), SparseLeft, left); if (m->getControl_pressed()) { delete SparseLeft; delete SparseRight; return 0; } createSparseMatrix(0, right.size(), SparseRight, right); if (m->getControl_pressed()) { delete SparseLeft; delete SparseRight; return 0; } left.clear(); right.clear(); vector distMapRight; vector distMapLeft; // Create a data structure to quickly access the distance information. //this is from thallingers reimplementation on get.oturep // It consists of a vector of distance maps, where each map contains // all distances of a certain sequence. Vector and maps are accessed // via the index of a sequence in the distance matrix distMapRight = vector(numSeqs); distMapLeft = vector(numSeqs); for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) { distMapLeft[currentCell->row][currentCell->column] = currentCell->dist; if (m->getControl_pressed()) { delete SparseLeft; delete SparseRight; return 0; } } for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) { distMapRight[currentCell->row][currentCell->column] = currentCell->dist; if (m->getControl_pressed()) { delete SparseLeft; delete SparseRight; return 0; } } delete SparseLeft; delete SparseRight; //fill preference structure generatePreferences(distMapLeft, distMapRight, midpoint); if (m->getControl_pressed()) { return 0; } //report progress if((h+1) % 10 == 0){ m->mothurOutJustToScreen("Processing sliding window: " + toString(h+1) + "\n") ; } } //report progress if((iters) % 10 != 0){ m->mothurOutJustToScreen("Processing sliding window: " + toString(iters) + "\n") ; } return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "driverChimeras"); exit(1); } } /***************************************************************************************************************/ int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector s){ try { for(int i=startSeq; igetControl_pressed()) { return 0; } double dist = distCalculator->calcDist(s[i], s[j]); PCell temp(i, j, dist); sparse->addCell(temp); } } return 1; } catch(exception& e) { m->errorOut(e, "Bellerophon", "createSparseMatrix"); exit(1); } } /***************************************************************************************************************/ int Bellerophon::generatePreferences(vector left, vector right, int mid){ try { SeqMap::iterator itR; SeqMap::iterator itL; for (int i = 0; i < left.size(); i++) { SeqMap currentLeft = left[i]; //example i = 3; currentLeft is a map of 0 to the distance of sequence 3 to sequence 0, // 1 to the distance of sequence 3 to sequence 1, // 2 to the distance of sequence 3 to sequence 2. SeqMap currentRight = right[i]; // same as left but with distances on the right side. for (int j = 0; j < i; j++) { if (m->getControl_pressed()) { return 0; } itL = currentLeft.find(j); itR = currentRight.find(j); //if you can find this entry update the preferences if ((itL != currentLeft.end()) && (itR != currentRight.end())) { if (!correction) { pref[count][i].score += abs((itL->second - itR->second)); pref[count][j].score += abs((itL->second - itR->second)); }else { pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second))); pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second))); } //are you the closest left sequence if (itL->second < pref[count][i].closestLeft) { pref[count][i].closestLeft = itL->second; pref[count][i].leftParent = seqs[j]->getName(); } if (itL->second < pref[count][j].closestLeft) { pref[count][j].closestLeft = itL->second; pref[count][j].leftParent = seqs[i]->getName(); } //are you the closest right sequence if (itR->second < pref[count][i].closestRight) { pref[count][i].closestRight = itR->second; pref[count][i].rightParent = seqs[j]->getName(); } if (itR->second < pref[count][j].closestRight) { pref[count][j].closestRight = itR->second; pref[count][j].rightParent = seqs[i]->getName(); } } } } return 1; } catch(exception& e) { m->errorOut(e, "Bellerophon", "generatePreferences"); exit(1); } } /**************************************************************************************************/ vector Bellerophon::getBestPref() { try { vector best; //for each sequence for (int i = 0; i < numSeqs; i++) { //set best pref score to first one Preference temp = pref[0][i]; if (m->getControl_pressed()) { return best; } //for each window for (int j = 1; j < pref.size(); j++) { //is this a better score if (pref[j][i].score > temp.score) { temp = pref[j][i]; } } best.push_back(temp); } //rank preference score to eachother float dme = 0.0; float expectedPercent = 1 / (float) (best.size()); for (int i = 0; i < best.size(); i++) { dme += best[i].score; } for (int i = 0; i < best.size(); i++) { if (m->getControl_pressed()) { return best; } //gives the actual percentage of the dme this seq adds best[i].score = best[i].score / dme; //how much higher or lower is this than expected best[i].score = best[i].score / expectedPercent; } //sort Preferences highest to lowest sort(best.begin(), best.end(), comparePref); return best; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getBestPref"); exit(1); } } /**************************************************************************************************/ int Bellerophon::writePrefs(string file) { try { ofstream outTemp; Utils util; util.openOutputFile(file, outTemp); //lets you know what part of the pref matrix you are writing outTemp << 0 << '\t' << iters << endl; for (int i = 0; i < iters; i++) { for (int j = 0; j < numSeqs; j++) { if (m->getControl_pressed()) { outTemp.close(); util.mothurRemove(file); return 0; } outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t'; outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint << endl; } } outTemp.close(); return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "writePrefs"); exit(1); } } /**************************************************************************************************/ int Bellerophon::readPrefs(string file) { try { ifstream inTemp; Utils util; util.openInputFile(file, inTemp); int start, num; //lets you know what part of the pref matrix you are writing inTemp >> start >> num; gobble(inTemp); for (int i = start; i < num; i++) { for (int j = 0; j < numSeqs; j++) { if (m->getControl_pressed()) { inTemp.close(); util.mothurRemove(file); return 0; } inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent; inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint; gobble(inTemp); } } inTemp.close(); util.mothurRemove(file); return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "writePrefs"); exit(1); } } /**************************************************************************************************/ vector Bellerophon::getBestWindow() { try { vector best; //for each sequence for (int i = 0; i < numSeqs; i++) { //set best pref score to first one Preference temp = pref[0][i]; if (m->getControl_pressed()) { return best; } //for each window - is this a better score for (int j = 1; j < iters; j++) { if (pref[j][i].score > temp.score) { temp = pref[j][i]; } } string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score); best.push_back(tempString); } return best; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getBestWindow"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/chimera/bellerophon.h000077500000000000000000000027211424121717000205200ustar00rootroot00000000000000#ifndef BELLEROPHON_H #define BELLEROPHON_H /* * bellerophon.h * Mothur * * Created by Sarah Westcott on 7/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothurchimera.h" #include "sparsematrix.hpp" #include "sequence.hpp" #include "calculator.h" typedef list::iterator MatData; typedef map SeqMap; //maps sequence to all distance for that seqeunce /***********************************************************/ class Bellerophon : public MothurChimera { public: Bellerophon(string, bool, bool, int, int, string); //fastafile, filter, correction, window, increment, outputDir); ~Bellerophon() { delete distCalculator; for (int i = 0; i < seqs.size(); i++) { delete seqs[i]; } seqs.clear(); } int getChimeras(); int print(ostream&, ostream&, string); private: DistCalc* distCalculator; vector seqs; vector< vector > pref; //pref[0] = preference scores for all seqs in window 0. string fastafile; int iters, count, window, increment, numSeqs; //iters = number of windows bool correction; int generatePreferences(vector, vector, int); int createSparseMatrix(int, int, SparseMatrix*, vector); vector getBestPref(); int driverChimeras(vector); int writePrefs(string); int readPrefs(string); vector getBestWindow(); }; /***********************************************************/ #endif mothur-1.48.0/source/chimera/ccode.cpp000077500000000000000000000744201424121717000176240ustar00rootroot00000000000000/* * ccode.cpp * Mothur * * Created by westcott on 8/24/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "ccode.h" #include "ignoregaps.h" #include "eachgapdist.h" //*************************************************************************************************************** Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : MothurChimera() { try { fastafile = filename; outputDir = o; templateFileName = temp; templateSeqs = readSeqs(temp); setMask(mask); filter = f; window = win; numWanted = numW; distCalc = new eachGapDist(1.0); decalc = new DeCalculator(); Utils util; mapInfo = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "mapinfo"; ofstream out2; util.openOutputFile(mapInfo, out2); out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl; out2.close(); } catch(exception& e) { m->errorOut(e, "Ccode", "Ccode"); exit(1); } } //*************************************************************************************************************** Ccode::~Ccode() { delete distCalc; delete decalc; } //*************************************************************************************************************** Sequence Ccode::print(ostream& out, ostream& outAcc) { try { ofstream out2; Utils util; util.openOutputFileAppend(mapInfo, out2); out2 << querySeq->getName() << endl; for (it = spotMap.begin(); it!= spotMap.end(); it++) { out2 << it->first << '\t' << it->second << endl; } out2.close(); out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl; for (int j = 0; j < closest.size(); j++) { out << closest[j].seq->getName() << '\t' << closest[j].dist << endl; } out << endl << endl; //for each window //window mapping info. out << "Mapping information: "; //you mask and did not filter if ((seqMask != "") && (!filter)) { out << "mask and trim."; } //you filtered and did not mask if ((seqMask == "") && (filter)) { out << "filter and trim."; } //you masked and filtered if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; } out << endl << "Window\tStartPos\tEndPos" << endl; it = trim.begin(); for (int k = 0; k < windows.size()-1; k++) { out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl; } out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl; out << endl; out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl; for (int k = 0; k < windows.size(); k++) { float ds = averageQuery[k] / averageRef[k]; out << k+1 << '\t' << averageQuery[k] << '\t' << sdQuery[k] << '\t' << averageRef[k] << '\t'<< sdRef[k] << '\t' << ds << '\t' << anova[k] << endl; } out << endl; //varRef //varQuery /* F test for differences among variances. * varQuery is expected to be higher or similar than varRef */ //float fs = varQuery[query] / varRef[query]; /* F-Snedecor, test for differences of variances */ bool results = false; //confidence limit, t - Student, anova out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl; for (int k = 0; k < windows.size(); k++) { string temp = ""; if (isChimericConfidence[k]) { temp += "*\t"; } else { temp += "\t"; } if (isChimericTStudent[k]) { temp += "*\t"; } else { temp += "\t"; } if (isChimericANOVA[k]) { temp += "*\t"; } else { temp += "\t"; } out << k+1 << '\t' << temp << endl; if (temp == "*\t*\t*\t") { results = true; } } out << endl; if (results) { m->mothurOut(querySeq->getName() + " was found have at least one chimeric window.\n"); outAcc << querySeq->getName() << endl; } //free memory for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } return *querySeq; } catch(exception& e) { m->errorOut(e, "Ccode", "print"); exit(1); } } //*************************************************************************************************************** int Ccode::getChimeras(Sequence* query) { try { closest.clear(); refCombo = 0; sumRef.clear(); varRef.clear(); varQuery.clear(); sdRef.clear(); sdQuery.clear(); sumQuery.clear(); sumSquaredRef.clear(); sumSquaredQuery.clear(); averageRef.clear(); averageQuery.clear(); anova.clear(); isChimericConfidence.clear(); isChimericTStudent.clear(); isChimericANOVA.clear(); trim.clear(); spotMap.clear(); windowSizes = window; windows.clear(); querySeq = query; //find closest matches to query closest = findClosest(query, numWanted); if (m->getControl_pressed()) { return 0; } //initialize spotMap for (int i = 0; i < query->getAligned().length(); i++) { spotMap[i] = i; } //mask sequences if the user wants to if (seqMask != "") { decalc->setMask(seqMask); decalc->runMask(query); //mask closest for (int i = 0; i < closest.size(); i++) { decalc->runMask(closest[i].seq); } spotMap = decalc->getMaskMap(); } if (filter) { vector temp; for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq); } temp.push_back(query); createFilter(temp, 0.5); for (int i = 0; i < temp.size(); i++) { if (m->getControl_pressed()) { return 0; } runFilter(temp[i]); } //update spotMap map newMap; int spot = 0; for (int i = 0; i < filterString.length(); i++) { if (filterString[i] == '1') { //add to newMap newMap[spot] = spotMap[i]; spot++; } } spotMap = newMap; } //trim sequences - this follows ccodes remove_extra_gaps trimSequences(query); if (m->getControl_pressed()) { return 0; } //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length(). //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations windows = findWindows(); if (m->getControl_pressed()) { return 0; } //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later removeBadReferenceSeqs(closest); if (m->getControl_pressed()) { return 0; } //find the averages for each querys references getAverageRef(closest); //fills sumRef, averageRef, sumSquaredRef and refCombo. getAverageQuery(closest, query); //fills sumQuery, averageQuery, sumSquaredQuery. if (m->getControl_pressed()) { return 0; } //find the averages for each querys references findVarianceRef(); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. if (m->getControl_pressed()) { return 0; } //find the averages for the query findVarianceQuery(); //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0. if (m->getControl_pressed()) { return 0; } determineChimeras(); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. if (m->getControl_pressed()) { return 0; } return 0; } catch(exception& e) { m->errorOut(e, "Ccode", "getChimeras"); exit(1); } } /***************************************************************************************************************/ //ccode algo says it does this to "Removes the initial and final gaps to avoid biases due to incomplete sequences." void Ccode::trimSequences(Sequence* query) { try { int frontPos = 0; //should contain first position in all seqs that is not a gap character int rearPos = query->getAligned().length(); //********find first position in closest seqs that is a non gap character***********// //find first position all query seqs that is a non gap character for (int i = 0; i < closest.size(); i++) { string aligned = closest[i].seq->getAligned(); int pos = 0; //find first spot in this seq for (int j = 0; j < aligned.length(); j++) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos > frontPos) { frontPos = pos; } } //find first position all querySeq[query] that is a non gap character string aligned = query->getAligned(); int pos = 0; //find first spot in this seq for (int j = 0; j < aligned.length(); j++) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos > frontPos) { frontPos = pos; } //********find last position in closest seqs that is a non gap character***********// for (int i = 0; i < closest.size(); i++) { string aligned = closest[i].seq->getAligned(); int pos = aligned.length(); //find first spot in this seq for (int j = aligned.length()-1; j >= 0; j--) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos < rearPos) { rearPos = pos; } } //find last position all querySeqs[query] that is a non gap character aligned = query->getAligned(); pos = aligned.length(); //find first spot in this seq for (int j = aligned.length()-1; j >= 0; j--) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos < rearPos) { rearPos = pos; } //check to make sure that is not whole seq if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed.\n"); exit(1); } map tempTrim; tempTrim[frontPos] = rearPos; //save trimmed locations trim = tempTrim; //update spotMask map newMap; int spot = 0; for (int i = frontPos; i < rearPos; i++) { //add to newMap newMap[spot] = spotMap[i]; spot++; } spotMap = newMap; } catch(exception& e) { m->errorOut(e, "Ccode", "trimSequences"); exit(1); } } /***************************************************************************************************************/ vector Ccode::findWindows() { try { vector win; it = trim.begin(); int length = it->second - it->first; //default is wanted = 10% of total length if (windowSizes > length) { m->mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length."); windowSizes = length / 10; }else if (windowSizes == 0) { windowSizes = length / 10; } else if (windowSizes > (length * 0.20)) { m->mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway.\n"); }else if (windowSizes < (length * 0.05)) { m->mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway.\n"); } //save starting points of each window for (int m = it->first; m < (it->second-windowSizes); m+=windowSizes) { win.push_back(m); } //save last window if (win[win.size()-1] < (it->first+length)) { win.push_back(win[win.size()-1]+windowSizes); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75 } //with this you would get 1,25,50,75,100 return win; } catch(exception& e) { m->errorOut(e, "Ccode", "findWindows"); exit(1); } } //*************************************************************************************************************** int Ccode::getDiff(string seqA, string seqB) { try { int numDiff = 0; for (int i = 0; i < seqA.length(); i++) { //if you are both not gaps //if (isalpha(seqA[i]) && isalpha(seqA[i])) { //are you different if (seqA[i] != seqB[i]) { int ok; /* ok=1 means equivalent base. Checks for degenerate bases */ /* the char in base_a and base_b have been checked and they are different */ if ((seqA[i] == 'N') && (seqB[i] != '-')) ok = 1; else if ((seqB[i] == 'N') && (seqA[i] != '-')) ok = 1; else if ((seqA[i] == 'Y') && ((seqB[i] == 'C') || (seqB[i] == 'T'))) ok = 1; else if ((seqB[i] == 'Y') && ((seqA[i] == 'C') || (seqA[i] == 'T'))) ok = 1; else if ((seqA[i] == 'R') && ((seqB[i] == 'G') || (seqB[i] == 'A'))) ok = 1; else if ((seqB[i] == 'R') && ((seqA[i] == 'G') || (seqA[i] == 'A'))) ok = 1; else if ((seqA[i] == 'S') && ((seqB[i] == 'C') || (seqB[i] == 'G'))) ok = 1; else if ((seqB[i] == 'S') && ((seqA[i] == 'C') || (seqA[i] == 'G'))) ok = 1; else if ((seqA[i] == 'W') && ((seqB[i] == 'T') || (seqB[i] == 'A'))) ok = 1; else if ((seqB[i] == 'W') && ((seqA[i] == 'T') || (seqA[i] == 'A'))) ok = 1; else if ((seqA[i] == 'M') && ((seqB[i] == 'A') || (seqB[i] == 'C'))) ok = 1; else if ((seqB[i] == 'M') && ((seqA[i] == 'A') || (seqA[i] == 'C'))) ok = 1; else if ((seqA[i] == 'K') && ((seqB[i] == 'T') || (seqB[i] == 'G'))) ok = 1; else if ((seqB[i] == 'K') && ((seqA[i] == 'T') || (seqA[i] == 'G'))) ok = 1; else if ((seqA[i] == 'V') && ((seqB[i] == 'C') || (seqB[i] == 'A') || (seqB[i] == 'G'))) ok = 1; else if ((seqB[i] == 'V') && ((seqA[i] == 'C') || (seqA[i] == 'A') || (seqA[i] == 'G'))) ok = 1; else if ((seqA[i] == 'H') && ((seqB[i] == 'T') || (seqB[i] == 'A') || (seqB[i] == 'C'))) ok = 1; else if ((seqB[i] == 'H') && ((seqA[i] == 'T') || (seqA[i] == 'A') || (seqA[i] == 'C'))) ok = 1; else if ((seqA[i] == 'D') && ((seqB[i] == 'T') || (seqB[i] == 'A') || (seqB[i] == 'G'))) ok = 1; else if ((seqB[i] == 'D') && ((seqA[i] == 'T') || (seqA[i] == 'A') || (seqA[i] == 'G'))) ok = 1; else if ((seqA[i] == 'B') && ((seqB[i] == 'C') || (seqB[i] == 'T') || (seqB[i] == 'G'))) ok = 1; else if ((seqB[i] == 'B') && ((seqA[i] == 'C') || (seqA[i] == 'T') || (seqA[i] == 'G'))) ok = 1; else ok = 0; /* the bases are different and not equivalent */ //check if they are both blanks if ((seqA[i] == '.') && (seqB[i] == '-')) ok = 1; else if ((seqB[i] == '.') && (seqA[i] == '-')) ok = 1; if (ok == 0) { numDiff++; } } //} } return numDiff; } catch(exception& e) { m->errorOut(e, "Ccode", "getDiff"); exit(1); } } //*************************************************************************************************************** //tried to make this look most like ccode original implementation void Ccode::removeBadReferenceSeqs(vector& seqs) { try { vector< vector > numDiffBases; numDiffBases.resize(seqs.size()); //initialize to 0 for (int i = 0; i < numDiffBases.size(); i++) { numDiffBases[i].resize(seqs.size(),0); } it = trim.begin(); int length = it->second - it->first; //calc differences from each sequence to everyother seq in the set for (int i = 0; i < seqs.size(); i++) { string seqA = seqs[i].seq->getAligned().substr(it->first, length); //so you don't calc i to j and j to i since they are the same for (int j = 0; j < i; j++) { string seqB = seqs[j].seq->getAligned().substr(it->first, length); //compare strings int numDiff = getDiff(seqA, seqB); numDiffBases[i][j] = numDiff; numDiffBases[j][i] = numDiff; } } //initailize remove to 0 vector remove; remove.resize(seqs.size(), 0); float top = ((20*length) / (float) 100); float bottom = ((0.5*length) / (float) 100); //check each numDiffBases and if any are higher than threshold set remove to 1 so you can remove those seqs from the closest set for (int i = 0; i < numDiffBases.size(); i++) { for (int j = 0; j < i; j++) { //are you more than 20% different if (numDiffBases[i][j] > top) { remove[j] = 1; } //are you less than 0.5% different if (numDiffBases[i][j] < bottom) { remove[j] = 1; } } } int numSeqsLeft = 0; //count seqs that are not going to be removed for (int i = 0; i < remove.size(); i++) { if (remove[i] == 0) { numSeqsLeft++; } } //if you have enough then remove bad ones if (numSeqsLeft >= 3) { vector goodSeqs; //remove bad seqs for (int i = 0; i < remove.size(); i++) { if (remove[i] == 0) { goodSeqs.push_back(seqs[i]); } } seqs = goodSeqs; }else { //warn, but dont remove any m->mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check.\n"); } } catch(exception& e) { m->errorOut(e, "Ccode", "removeBadReferenceSeqs"); exit(1); } } //*************************************************************************************************************** //makes copy of templateseq for filter vector Ccode::findClosest(Sequence* q, int numWanted) { try{ vector topMatches; Sequence query = *(q); //calc distance to each sequence in template seqs for (int i = 0; i < templateSeqs.size(); i++) { Sequence ref = *(templateSeqs[i]); //find overall dist double dist = distCalc->calcDist(query, ref); //save distance SeqDist temp; temp.seq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); temp.dist = dist; topMatches.push_back(temp); } sort(topMatches.begin(), topMatches.end(), compareSeqDist); for (int i = numWanted; i < topMatches.size(); i++) { delete topMatches[i].seq; } topMatches.resize(numWanted); return topMatches; } catch(exception& e) { m->errorOut(e, "Ccode", "findClosestSides"); exit(1); } } /**************************************************************************************************/ //find the distances from each reference sequence to every other reference sequence for each window for this query void Ccode::getAverageRef(vector ref) { try { vector< vector< vector > > diffs; //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2. //initialize diffs vector diffs.resize(ref.size()); for (int i = 0; i < diffs.size(); i++) { diffs[i].resize(ref.size()); for (int j = 0; j < diffs[i].size(); j++) { diffs[i][j].resize(windows.size(), 0); } } it = trim.begin(); //find the distances from each reference sequence to every other reference sequence for each window for this query for (int i = 0; i < ref.size(); i++) { string refI = ref[i].seq->getAligned(); //jgetAligned(); for (int k = 0; k < windows.size(); k++) { string refIWindowk, refJWindowk; if (k < windows.size()-1) { //get window strings refIWindowk = refI.substr(windows[k], windowSizes); refJWindowk = refJ.substr(windows[k], windowSizes); }else { //last window may be smaller than rest - see findwindows //get window strings refIWindowk = refI.substr(windows[k], (it->second-windows[k])); refJWindowk = refJ.substr(windows[k], (it->second-windows[k])); } //find differences int diff = getDiff(refIWindowk, refJWindowk); //save differences in [i][j][k] and [j][i][k] since they are the same diffs[i][j][k] = diff; diffs[j][i][k] = diff; }//k }//j }//i //initialize sumRef for this query sumRef.resize(windows.size(), 0); sumSquaredRef.resize(windows.size(), 0); averageRef.resize(windows.size(), 0); //find the sum of the differences for hte reference sequences for (int i = 0; i < diffs.size(); i++) { for (int j = 0; j < i; j++) { //increment this querys reference sequences combos refCombo++; for (int k = 0; k < diffs[i][j].size(); k++) { sumRef[k] += diffs[i][j][k]; sumSquaredRef[k] += (diffs[i][j][k]*diffs[i][j][k]); }//k }//j }//i //find the average of the differences for the references for each window for (int i = 0; i < windows.size(); i++) { averageRef[i] = sumRef[i] / (float) refCombo; } } catch(exception& e) { m->errorOut(e, "Ccode", "getAverageRef"); exit(1); } } /**************************************************************************************************/ void Ccode::getAverageQuery (vector ref, Sequence* query) { try { vector< vector > diffs; //diffs[1][2] is the number of differences between querySeqs[query] and ref seq 1 at window 2. //initialize diffs vector diffs.resize(ref.size()); for (int j = 0; j < diffs.size(); j++) { diffs[j].resize(windows.size(), 0); } it = trim.begin(); string refQuery = query->getAligned(); //jgetAligned(); for (int k = 0; k < windows.size(); k++) { string QueryWindowk, refJWindowk; if (k < windows.size()-1) { //get window strings QueryWindowk = refQuery.substr(windows[k], windowSizes); refJWindowk = refJ.substr(windows[k], windowSizes); }else { //last window may be smaller than rest - see findwindows //get window strings QueryWindowk = refQuery.substr(windows[k], (it->second-windows[k])); refJWindowk = refJ.substr(windows[k], (it->second-windows[k])); } //find differences int diff = getDiff(QueryWindowk, refJWindowk); //save differences diffs[j][k] = diff; }//k }//j //initialize sumRef for this query sumQuery.resize(windows.size(), 0); sumSquaredQuery.resize(windows.size(), 0); averageQuery.resize(windows.size(), 0); //find the sum of the differences for (int j = 0; j < diffs.size(); j++) { for (int k = 0; k < diffs[j].size(); k++) { sumQuery[k] += diffs[j][k]; sumSquaredQuery[k] += (diffs[j][k]*diffs[j][k]); }//k }//j //find the average of the differences for the references for each window for (int i = 0; i < windows.size(); i++) { averageQuery[i] = sumQuery[i] / (float) ref.size(); } } catch(exception& e) { m->errorOut(e, "Ccode", "getAverageQuery"); exit(1); } } /**************************************************************************************************/ void Ccode::findVarianceRef() { try { varRef.resize(windows.size(), 0); sdRef.resize(windows.size(), 0); //for each window for (int i = 0; i < windows.size(); i++) { varRef[i] = (sumSquaredRef[i] - ((sumRef[i]*sumRef[i])/(float)refCombo)) / (float)(refCombo-1); sdRef[i] = sqrt(varRef[i]); //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation if (averageRef[i] < 0.001) { averageRef[i] = 0.001; } if (sumRef[i] < 0.001) { sumRef[i] = 0.001; } if (varRef[i] < 0.001) { varRef[i] = 0.001; } if (sumSquaredRef[i] < 0.001) { sumSquaredRef[i] = 0.001; } if (sdRef[i] < 0.001) { sdRef[i] = 0.001; } } } catch(exception& e) { m->errorOut(e, "Ccode", "findVarianceRef"); exit(1); } } /**************************************************************************************************/ void Ccode::findVarianceQuery() { try { varQuery.resize(windows.size(), 0); sdQuery.resize(windows.size(), 0); //for each window for (int i = 0; i < windows.size(); i++) { varQuery[i] = (sumSquaredQuery[i] - ((sumQuery[i]*sumQuery[i])/(float) closest.size())) / (float) (closest.size()-1); sdQuery[i] = sqrt(varQuery[i]); //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation if (averageQuery[i] < 0.001) { averageQuery[i] = 0.001; } if (sumQuery[i] < 0.001) { sumQuery[i] = 0.001; } if (varQuery[i] < 0.001) { varQuery[i] = 0.001; } if (sumSquaredQuery[i] < 0.001) { sumSquaredQuery[i] = 0.001; } if (sdQuery[i] < 0.001) { sdQuery[i] = 0.001; } } } catch(exception& e) { m->errorOut(e, "Ccode", "findVarianceQuery"); exit(1); } } /**************************************************************************************************/ void Ccode::determineChimeras() { try { isChimericConfidence.resize(windows.size(), false); isChimericTStudent.resize(windows.size(), false); isChimericANOVA.resize(windows.size(), false); anova.resize(windows.size()); //for each window for (int i = 0; i < windows.size(); i++) { //get confidence limits float t = getT(closest.size()-1); //how many seqs you are comparing to this querySeq float dsUpper = (averageQuery[i] + (t * sdQuery[i])) / averageRef[i]; float dsLower = (averageQuery[i] - (t * sdQuery[i])) / averageRef[i]; if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[i] > averageRef[i])) { /* range does not include 1 */ isChimericConfidence[i] = true; /* significantly higher at P<0.05 */ } //student t test int degreeOfFreedom = refCombo + closest.size() - 2; float denomForT = (((refCombo-1) * varQuery[i] + (closest.size() - 1) * varRef[i]) / (float) degreeOfFreedom) * ((refCombo + closest.size()) / (float) (refCombo * closest.size())); /* denominator, without sqrt(), for ts calculations */ float ts = fabs((averageQuery[i] - averageRef[i]) / (sqrt(denomForT))); /* value of ts for t-student test */ t = getT(degreeOfFreedom); if ((ts >= t) && (averageQuery[i] > averageRef[i])) { isChimericTStudent[i] = true; /* significantly higher at P<0.05 */ } //anova test float value1 = sumQuery[i] + sumRef[i]; float value2 = sumSquaredQuery[i] + sumSquaredRef[i]; float value3 = ((sumQuery[i]*sumQuery[i]) / (float) (closest.size())) + ((sumRef[i] * sumRef[i]) / (float) refCombo); float value4 = (value1 * value1) / ( (float) (closest.size() + refCombo) ); float value5 = value2 - value4; float value6 = value3 - value4; float value7 = value5 - value6; float value8 = value7 / ((float) degreeOfFreedom); float anovaValue = value6 / value8; float f = getF(degreeOfFreedom); if ((anovaValue >= f) && (averageQuery[i] > averageRef[i])) { isChimericANOVA[i] = true; /* significant P<0.05 */ } if (isnan(anovaValue) || isinf(anovaValue)) { anovaValue = 0.0; } anova[i] = anovaValue; } } catch(exception& e) { m->errorOut(e, "Ccode", "determineChimeras"); exit(1); } } /**************************************************************************************************/ float Ccode::getT(int numseq) { try { float tvalue = 0; /* t-student critical values for different degrees of freedom and alpha 0.1 in one-tail tests (equivalent to 0.05) */ if (numseq > 120) tvalue = 1.645; else if (numseq > 60) tvalue = 1.658; else if (numseq > 40) tvalue = 1.671; else if (numseq > 30) tvalue = 1.684; else if (numseq > 29) tvalue = 1.697; else if (numseq > 28) tvalue = 1.699; else if (numseq > 27) tvalue = 1.701; else if (numseq > 26) tvalue = 1.703; else if (numseq > 25) tvalue = 1.706; else if (numseq > 24) tvalue = 1.708; else if (numseq > 23) tvalue = 1.711; else if (numseq > 22) tvalue = 1.714; else if (numseq > 21) tvalue = 1.717; else if (numseq > 20) tvalue = 1.721; else if (numseq > 19) tvalue = 1.725; else if (numseq > 18) tvalue = 1.729; else if (numseq > 17) tvalue = 1.734; else if (numseq > 16) tvalue = 1.740; else if (numseq > 15) tvalue = 1.746; else if (numseq > 14) tvalue = 1.753; else if (numseq > 13) tvalue = 1.761; else if (numseq > 12) tvalue = 1.771; else if (numseq > 11) tvalue = 1.782; else if (numseq > 10) tvalue = 1.796; else if (numseq > 9) tvalue = 1.812; else if (numseq > 8) tvalue = 1.833; else if (numseq > 7) tvalue = 1.860; else if (numseq > 6) tvalue = 1.895; else if (numseq > 5) tvalue = 1.943; else if (numseq > 4) tvalue = 2.015; else if (numseq > 3) tvalue = 2.132; else if (numseq > 2) tvalue = 2.353; else if (numseq > 1) tvalue = 2.920; else if (numseq <= 1) { m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n\n"); } return tvalue; } catch(exception& e) { m->errorOut(e, "Ccode", "getT"); exit(1); } } /**************************************************************************************************/ float Ccode::getF(int numseq) { try { float fvalue = 0; /* F-Snedecor critical values for v1=1 and different degrees of freedom v2 and alpha 0.05 */ if (numseq > 120) fvalue = 3.84; else if (numseq > 60) fvalue = 3.92; else if (numseq > 40) fvalue = 4.00; else if (numseq > 30) fvalue = 4.08; else if (numseq > 29) fvalue = 4.17; else if (numseq > 28) fvalue = 4.18; else if (numseq > 27) fvalue = 4.20; else if (numseq > 26) fvalue = 4.21; else if (numseq > 25) fvalue = 4.23; else if (numseq > 24) fvalue = 4.24; else if (numseq > 23) fvalue = 4.26; else if (numseq > 22) fvalue = 4.28; else if (numseq > 21) fvalue = 4.30; else if (numseq > 20) fvalue = 4.32; else if (numseq > 19) fvalue = 4.35; else if (numseq > 18) fvalue = 4.38; else if (numseq > 17) fvalue = 4.41; else if (numseq > 16) fvalue = 4.45; else if (numseq > 15) fvalue = 4.49; else if (numseq > 14) fvalue = 4.54; else if (numseq > 13) fvalue = 4.60; else if (numseq > 12) fvalue = 4.67; else if (numseq > 11) fvalue = 4.75; else if (numseq > 10) fvalue = 4.84; else if (numseq > 9) fvalue = 4.96; else if (numseq > 8) fvalue = 5.12; else if (numseq > 7) fvalue = 5.32; else if (numseq > 6) fvalue = 5.59; else if (numseq > 5) fvalue = 5.99; else if (numseq > 4) fvalue = 6.61; else if (numseq > 3) fvalue = 7.71; else if (numseq > 2) fvalue = 10.1; else if (numseq > 1) fvalue = 18.5; else if (numseq > 0) fvalue = 161; else if (numseq <= 0) { m->mothurOut("Two or more reference sequences are required, your data will be flawed.\n\n"); } return fvalue; } catch(exception& e) { m->errorOut(e, "Ccode", "getF"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/chimera/ccode.h000077500000000000000000000077571424121717000173020ustar00rootroot00000000000000#ifndef CCODE_H #define CCODE_H /* * ccode.h * Mothur * * Created by westcott on 8/24/09. * Copyright 2009 Schloss LAB. All rights reserved. * */ #include "mothurchimera.h" #include "calculator.h" #include "decalc.h" /***********************************************************/ //This class was created using the algorithms described in the // "Evaluating putative chimeric sequences from PCR-amplified products" paper //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez. /***********************************************************/ class Ccode : public MothurChimera { public: Ccode(string, string, bool, string, int, int, string); //fasta, template, filter, mask, window, numWanted, outputDir ~Ccode(); int getChimeras(Sequence* query); Sequence print(ostream&, ostream&); private: DistCalc* distCalc; DeCalculator* decalc; int iters, window, numWanted; string fastafile, mapInfo; Sequence* querySeq; map spotMap; map::iterator it; vector windows; //windows is the vector of window breaks for query int windowSizes; //windowSizes is the size of the windows for query map trim; //trim is the map containing the starting and ending positions for query vector closest; //closest is a vector of sequence at are closest to query vector averageRef; //averageRef is the average distance at each window for the references for query vector averageQuery; //averageQuery is the average distance at each winow for the query for query vector sumRef; //sumRef is the sum of distances at each window for the references for query vector sumSquaredRef; //sumSquaredRef is the sum of squared distances at each window for the references for query vector sumQuery; //sumQuery is the sum of distances at each window for the comparison of query to references for query vector sumSquaredQuery; //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query vector varRef; //varRef is the variance among references seqs at each window for query vector varQuery; //varQuery is the variance among references and query at each window vector sdRef; //sdRef is the standard deviation of references seqs at each window for query vector sdQuery; //sdQuery is the standard deviation of references and query at each window vector anova; //anova is the vector of anova scores for each window for query int refCombo; //refCombo is the number of reference sequences combinations for query vector isChimericConfidence; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits vector isChimericTStudent; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits vector isChimericANOVA; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits vector findClosest(Sequence*, int); void removeBadReferenceSeqs(vector&); //removes sequences from closest that are to different of too similar to eachother. void trimSequences(Sequence*); vector findWindows(); void getAverageRef(vector); //fills sumRef, averageRef, sumSquaredRef and refCombo. void getAverageQuery (vector, Sequence*); //fills sumQuery, averageQuery, sumSquaredQuery. void findVarianceRef (); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. void findVarianceQuery (); //fills varQuery and sdQuery void determineChimeras (); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch float getT(int); float getF(int); }; /***********************************************************/ #endif mothur-1.48.0/source/chimera/chimeracheckrdp.cpp000077500000000000000000000256021424121717000216610ustar00rootroot00000000000000/* * chimeracheckrdp.cpp * Mothur * * Created by westcott on 9/8/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "chimeracheckrdp.h" //*************************************************************************************************************** ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : MothurChimera() { try { fastafile = filename; templateFileName = temp; name = n; svg = s; increment = inc; kmerSize = k; outputDir = o; templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0, util.getRandomNumber(), true); m->mothurOutEndLine(); kmer = new Kmer(kmerSize); if (name != "") { readName(name); //fills name map with names of seqs the user wants to have .svg for. } } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP"); exit(1); } } //*************************************************************************************************************** ChimeraCheckRDP::~ChimeraCheckRDP() { try { delete templateDB; delete kmer; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP"); exit(1); } } //*************************************************************************************************************** Sequence ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { try { m->mothurOut("Processing: " + querySeq->getName()+"\n"); out << querySeq->getName() << endl; out << "IS scores: " << '\t'; for (int k = 0; k < IS.size(); k++) { out << IS[k].score << '\t'; } out << endl; if (svg) { if (name != "") { //if user has specific names map::iterator it = names.find(querySeq->getName()); if (it != names.end()) { //user wants pic of this makeSVGpic(IS); //zeros out negative results } }else{//output them all makeSVGpic(IS); //zeros out negative results } } return *querySeq; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "print"); exit(1); } } //*************************************************************************************************************** int ChimeraCheckRDP::getChimeras(Sequence* query) { try { IS.clear(); querySeq = query; float searchScore; closest = templateDB->findClosestSequence(query, searchScore); IS = findIS(); //determine chimera report cutoff - window score above 95% //getCutoff(); - not very acurate predictor return 0; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "getChimeras"); exit(1); } } //*************************************************************************************************************** vector ChimeraCheckRDP::findIS() { try { vector< map > queryKmerInfo; //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq //example: seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli. //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the //kmers 2 seqs had in common. There may be a better way to do this thats why I am leaving so many comments... vector< map > subjectKmerInfo; vector isValues; string queryName = querySeq->getName(); string seq = querySeq->getUnaligned(); queryKmerInfo = kmer->getKmerCounts(seq); subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned()); //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]); //you don't want the starting point to be virtually at the end so move it in 10% int start = seq.length() / 10; //for each window for (int f = start; f < (seq.length() - start); f+=increment) { if ((f - kmerSize) < 0) { m->mothurOut("[ERROR]: Sequence " + querySeq->getName() + " is too short for your kmerSize, quitting.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { return isValues; } sim temp; string fragLeft = seq.substr(0, f); //left side of breakpoint string fragRight = seq.substr(f); //right side of breakpoint //make a sequence of the left side and right side Sequence* left = new Sequence(queryName, fragLeft); Sequence* right = new Sequence(queryName, fragRight); //find seqs closest to each fragment float searchScore; Sequence closestLeft = templateDB->findClosestSequence(left, searchScore); Sequence closestRight = templateDB->findClosestSequence(right, searchScore); //get kmerinfo for the closest left vector< map > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned()); //get kmerinfo for the closest right vector< map > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned()); //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side //iterate through left sides map to subtract the number of times you saw things before you got the the right side map rightside = queryKmerInfo[queryKmerInfo.size()-1]; for (map::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) { int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side int howmanyright = howManyTotal - itleft->second; //if any were seen just on the left erase if (howmanyright == 0) { rightside.erase(itleft->first); } } map closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1]; for (map::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) { int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side int howmanyright = howManyTotal - itright->second; //if any were seen just on the left erase if (howmanyright == 0) { closerightside.erase(itright->first); } } int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]); int nRight = calcKmers(closerightside, rightside); int is = nLeft + nRight - nTotal; //save IS, leftparent, rightparent, breakpoint temp.leftParent = closestLeft.getName(); temp.rightParent = closestRight.getName(); temp.score = is; temp.midpoint = f; isValues.push_back(temp); delete left; delete right; } return isValues; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "findIS"); exit(1); } } //*************************************************************************************************************** void ChimeraCheckRDP::readName(string namefile) { try{ string name; ifstream in; Utils util; util.openInputFile(namefile, in); while (!in.eof()) { in >> name; gobble(in); names[name] = name; } in.close(); } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "readName"); exit(1); } } //*************************************************************************************************************** //find the smaller map and iterate through it and count kmers in common int ChimeraCheckRDP::calcKmers(map query, map subject) { try{ int common = 0; map::iterator smallone; map::iterator largeone; if (query.size() < subject.size()) { for (smallone = query.begin(); smallone != query.end(); smallone++) { largeone = subject.find(smallone->first); //if you found it they have that kmer in common if (largeone != subject.end()) { common++; } } }else { for (smallone = subject.begin(); smallone != subject.end(); smallone++) { largeone = query.find(smallone->first); //if you found it they have that kmer in common if (largeone != query.end()) { common++; } } } return common; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "calcKmers"); exit(1); } } //*************************************************************************************************************** void ChimeraCheckRDP::makeSVGpic(vector info) { try{ string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; ofstream outsvg; Utils util; util.openOutputFile(file, outsvg); int width = (info.size()*5) + 150; outsvg << "\n"; outsvg << "\n"; outsvg << "Plotted IS values for " + querySeq->getName() + "\n"; outsvg << "\n"; outsvg << "\n"; outsvg << "" + toString(info[0].midpoint) + "\n"; outsvg << "" + toString(info[info.size()-1].midpoint) + "\n"; outsvg << "Base Positions\n"; outsvg << "0\n"; outsvg << "IS\n"; //find max is score float biggest = 0.0; for (int i = 0; i < info.size(); i++) { if (info[i].score > biggest) { biggest = info[i].score; } } outsvg << "" + toString(biggest) + "\n"; int scaler2 = 500 / biggest; outsvg << " "; for (int i = 0; i < info.size(); i++) { if(info[i].score < 0) { info[i].score = 0; } outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " "; } outsvg << "\"/> "; outsvg << "\n\n"; outsvg.close(); } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); exit(1); } } //***************************************************************************************************************/ mothur-1.48.0/source/chimera/chimeracheckrdp.h000077500000000000000000000024601424121717000213230ustar00rootroot00000000000000#ifndef CHIMERACHECK_H #define CHIMERACHECK_H /* * chimeracheckrdp.h * Mothur * * Created by westcott on 9/8/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothurchimera.h" #include "kmer.hpp" #include "kmerdb.hpp" #include "alignmentdb.h" /***********************************************************/ //This class was created using the algorithms described in //CHIMERA_CHECK version 2.7 written by Niels Larsen. /***********************************************************/ class ChimeraCheckRDP : public MothurChimera { public: ChimeraCheckRDP(string, string, string, bool, int, int, string); //fasta, template, name, svg, increment, ksize, outputDir ~ChimeraCheckRDP(); int getChimeras(Sequence*); Sequence print(ostream&, ostream&); private: Sequence* querySeq; AlignmentDB* templateDB; Kmer* kmer; Sequence closest; //closest is the closest overall seq to query vector IS; //IS is the vector of IS values for each window for query string fastafile; map names; string name; bool svg; int kmerSize, increment; vector findIS(); int calcKmers(map, map); void makeSVGpic(vector); void readName(string); }; /***********************************************************/ #endif mothur-1.48.0/source/chimera/chimerarealigner.cpp000077500000000000000000000161431424121717000220460ustar00rootroot00000000000000/* * chimerarealigner.cpp * Mothur * * Created by westcott on 2/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimerarealigner.h" #include "needlemanoverlap.hpp" #include "nast.hpp" //*************************************************************************************************************** ChimeraReAligner::ChimeraReAligner() { m = MothurOut::getInstance(); } //*************************************************************************************************************** ChimeraReAligner::~ChimeraReAligner() = default; //*************************************************************************************************************** void ChimeraReAligner::reAlign(Sequence* query, vector parents) { try { if(parents.size() != 0){ alignmentLength = query->getAlignLength(); //x int queryUnalignedLength = query->getNumBases(); //y buildTemplateProfile(parents); createAlignMatrix(queryUnalignedLength, alignmentLength); fillAlignMatrix(query->getUnaligned()); query->setAligned(getNewAlignment(query->getUnaligned())); } } catch(exception& e) { m->errorOut(e, "ChimeraReAligner", "reAlign"); exit(1); } } /***************************************************************************************************************/ void ChimeraReAligner::buildTemplateProfile(vector parents) { try{ int numParents = parents.size(); profile.resize(alignmentLength); for(int i=0;ierrorOut(e, "ChimeraReAligner", "buildTemplateProfile"); exit(1); } } /***************************************************************************************************************/ void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){ try{ alignMatrix.resize(alignmentLength+1); for(int i=0;i<=alignmentLength;i++){ alignMatrix[i].resize(queryUnalignedLength+1); } for(int i=1;i<=alignmentLength;i++) { alignMatrix[i][0].direction = 'l'; } for(int j=1;j<=queryUnalignedLength;j++){ alignMatrix[0][j].direction = 'u'; } } catch(exception& e) { m->errorOut(e, "ChimeraReAligner", "createAlignMatrix"); exit(1); } } /***************************************************************************************************************/ void ChimeraReAligner::fillAlignMatrix(string query){ try{ int GAP = -4; int nrows = alignMatrix.size()-1; int ncols = alignMatrix[0].size()-1; for(int i=1;i<=nrows;i++){ bases p = profile[i-1]; int numChars = p.Chars; for(int j=1;j<=ncols;j++){ char q = query[j-1]; // score it for if there was a match int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score; int maxDirection = 'd'; // score it for if there was a gap in the query int score = alignMatrix[i-1][j].score + (numChars * GAP); if (score > maxScore) { maxScore = score; maxDirection = 'l'; } alignMatrix[i][j].score = maxScore; alignMatrix[i][j].direction = maxDirection; } } } catch(exception& e) { m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix"); exit(1); } } /***************************************************************************************************************/ int ChimeraReAligner::calcMatchScore(bases p, char q){ try{ int MATCH = 5; int MISMATCH = -4; int score = 0; if(q == 'G') { score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap)); } else if(q == 'A') { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); } else if(q == 'T') { score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap)); } else if(q == 'C') { score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap)); } else { score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap)); } return score; } catch(exception& e) { m->errorOut(e, "ChimeraReAligner", "calcMatchScore"); exit(1); } } /***************************************************************************************************************/ string ChimeraReAligner::getNewAlignment(string query){ try{ string queryAlignment(alignmentLength, '.'); string referenceAlignment(alignmentLength, '.'); int maxScore = -99999999; int nrows = alignMatrix.size()-1; int ncols = alignMatrix[0].size()-1; int bestCol = -1; int bestRow = -1; for(int i=1;i<=nrows;i++){ int score = alignMatrix[i][ncols].score; if (score > maxScore) { maxScore = score; bestRow = i; bestCol = ncols; } } for(int j=1;j<=ncols;j++){ int score = alignMatrix[nrows][j].score; if (score > maxScore) { maxScore = score; bestRow = nrows; bestCol = j; } } int currentRow = bestRow; int currentCol = bestCol; int alignmentPosition = 0; if(currentRow < alignmentLength){ for(int i=alignmentLength;i>currentRow;i--){ alignmentPosition++; } } AlignCell c = alignMatrix[currentRow][currentCol]; while(c.direction != 'x'){ char q; if(c.direction == 'd'){ q = query[currentCol-1]; currentCol--; currentRow--; } else if (c.direction == 'u') { break; } else if(c.direction == 'l'){ char gapChar; if(currentCol == 0) { gapChar = '.'; } else { gapChar = '-'; } q = gapChar; currentRow--; } else{ m->mothurOut("[ERROR]: Unexpected case in ChimeraReAligner::getNewAlignment, aborting.\n"); m->setControl_pressed(true); } queryAlignment[alignmentPosition] = q; alignmentPosition++; c = alignMatrix[currentRow][currentCol]; } // need to reverse the string string flipSeq = ""; for(int i=alignmentLength-1;i>=0;i--){ flipSeq += queryAlignment[i]; } return flipSeq; } catch(exception& e) { m->errorOut(e, "ChimeraReAligner", "getNewAlignment"); exit(1); } } /***************************************************************************************************************/ // Sequence* ChimeraReAligner::getSequence(string name) { // try{ // Sequence* temp; // // //look through templateSeqs til you find it // int spot = -1; // for (int i = 0; i < templateSeqs.size(); i++) { // if (name == templateSeqs[i]->getName()) { // spot = i; // break; // } // } // // if(spot == -1) { m->mothurOut("Error: Could not find sequence.\n"); return nullptr; } // // temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned()); // // return temp; // } // catch(exception& e) { // m->errorOut(e, "ChimeraReAligner", "getSequence"); // exit(1); // } //} //***************************************************************************************************************/ mothur-1.48.0/source/chimera/chimerarealigner.h000077500000000000000000000021521424121717000215060ustar00rootroot00000000000000#ifndef CHIMERAREALIGNER_H #define CHIMERAREALIGNER_H /* * chimerarealigner.h * Mothur * * Created by westcott on 2/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothurchimera.h" #include "alignment.hpp" /***********************************************************/ struct AlignCell { int score; char direction; AlignCell() : score(0), direction('x') {}; }; /***********************************************************/ struct bases { int A, T, G, C, Gap, Chars; bases() : A(0), T(0), G(0), C(0), Gap(0), Chars(0){}; }; /***********************************************************/ class ChimeraReAligner { public: ChimeraReAligner(); ~ChimeraReAligner(); void reAlign(Sequence*, vector); private: void buildTemplateProfile(vector); void createAlignMatrix(int, int); void fillAlignMatrix(string); int calcMatchScore(bases, char); string getNewAlignment(string); int alignmentLength; vector profile; vector > alignMatrix; MothurOut* m; }; /***********************************************************/ #endif mothur-1.48.0/source/chimera/chimeraslayer.cpp000077500000000000000000000742751424121717000214070ustar00rootroot00000000000000/* * chimeraslayer.cpp * Mothur * * Created by westcott on 9/25/09. * Copyright 2009 Pschloss Lab. All rights reserved. * */ #include "chimeraslayer.h" #include "chimerarealigner.h" #include "kmerdb.hpp" //*************************************************************************************************************** ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, int tid) : MothurChimera() { try { fastafile = file; templateFileName = temp; templateSeqs = readSeqs(temp); kmerSize = k; match = ms; misMatch = mms; window = win; divR = div; minSim = minsim; minCov = mincov; minBS = minbs; minSNP = minsnp; parents = par; iters = it; increment = inc; numWanted = numw; realign = r; trimChimera = trim; numNoParents = 0; threadID = tid; doPrep(); } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); exit(1); } } //*************************************************************************************************************** //template=self ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, int tid, bool bg) : MothurChimera() { try { byGroup = bg; fastafile = file; templateSeqs = readSeqs(fastafile); templateFileName = temp; kmerSize = k; match = ms; misMatch = mms; window = win; divR = div; minSim = minsim; minCov = mincov; minBS = minbs; minSNP = minsnp; parents = par; iters = it; increment = inc; numWanted = numw; realign = r; trimChimera = trim; priority = prior; numNoParents = 0; threadID = tid; createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); exit(1); } } //*************************************************************************************************************** //template=self ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, int tid) : MothurChimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); templateFileName = temp; kmerSize = k; match = ms; misMatch = mms; window = win; divR = div; minSim = minsim; minCov = mincov; minBS = minbs; minSNP = minsnp; parents = par; iters = it; increment = inc; numWanted = numw; realign = r; trimChimera = trim; priority = prior; numNoParents = 0; threadID = tid; createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); exit(1); } } //*************************************************************************************************************** int ChimeraSlayer::doPrep() { try { string kmerDBNameLeft; string kmerDBNameRight; Utils util; string templatePath = util.hasPath(templateFileName); string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName)); databaseRight = new KmerDB(rightTemplateFileName, kmerSize); string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName)); databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); //leftside kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestLeft(kmerDBNameLeft.c_str()); bool needToGenerateLeft = true; if(kmerFileTestLeft){ string line = util.getline(kmerFileTestLeft); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { needToGenerateLeft = false; } } if(needToGenerateLeft){ for (int i = 0; i < templateSeqs.size(); i++) { if (m->getControl_pressed()) { return 0; } string leftFrag = templateSeqs[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); Sequence leftTemp(templateSeqs[i]->getName(), leftFrag); databaseLeft->addSequence(leftTemp); } databaseLeft->generateDB(); }else { databaseLeft->readDB(kmerFileTestLeft); } kmerFileTestLeft.close(); databaseLeft->setNumSeqs(templateSeqs.size()); //rightside kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestRight(kmerDBNameRight.c_str()); bool needToGenerateRight = true; if(kmerFileTestRight){ string line = util.getline(kmerFileTestRight); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { needToGenerateRight = false; } } if(needToGenerateRight){ for (int i = 0; i < templateSeqs.size(); i++) { if (m->getControl_pressed()) { return 0; } string rightFrag = templateSeqs[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); Sequence rightTemp(templateSeqs[i]->getName(), rightFrag); databaseRight->addSequence(rightTemp); } databaseRight->generateDB(); }else { databaseRight->readDB(kmerFileTestRight); } kmerFileTestRight.close(); databaseRight->setNumSeqs(templateSeqs.size()); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "doprep"); exit(1); } } //*************************************************************************************************************** vector ChimeraSlayer::getTemplate(Sequence q, vector& userTemplateFiltered) { try { //when template=self, the query file is sorted from most abundance to least abundant //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant vector userTemplate; int myAbund = priority[q.getName()]; for (int i = 0; i < templateSeqs.size(); i++) { if (m->getControl_pressed()) { return userTemplate; } //have I reached a sequence with the same abundance as myself? if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; } //if its am not chimeric add it if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { userTemplate.push_back(templateSeqs[i]); } } Utils util; string kmerDBNameLeft, kmerDBNameRight; string templatePath = util.hasPath(templateFileName); string rightTemplateFileName = templatePath + "right." + util.getRootName(util.getSimpleName(templateFileName)); databaseRight = new KmerDB(rightTemplateFileName, kmerSize); string leftTemplateFileName = templatePath + "left." + util.getRootName(util.getSimpleName(templateFileName)); databaseLeft = new KmerDB(leftTemplateFileName, kmerSize); for (int i = 0; i < userTemplate.size(); i++) { if (m->getControl_pressed()) { return userTemplate; } string leftFrag = userTemplate[i]->getUnaligned(); leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33)); Sequence leftTemp(userTemplate[i]->getName(), leftFrag); databaseLeft->addSequence(leftTemp); } databaseLeft->generateDB(); databaseLeft->setNumSeqs(userTemplate.size()); for (int i = 0; i < userTemplate.size(); i++) { if (m->getControl_pressed()) { return userTemplate; } string rightFrag = userTemplate[i]->getUnaligned(); rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66)); Sequence rightTemp(userTemplate[i]->getName(), rightFrag); databaseRight->addSequence(rightTemp); } databaseRight->generateDB(); databaseRight->setNumSeqs(userTemplate.size()); return userTemplate; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getTemplate"); exit(1); } } //*************************************************************************************************************** ChimeraSlayer::~ChimeraSlayer() { if (templateFileName != "self") { delete databaseRight; delete databaseLeft; } } //*************************************************************************************************************** void ChimeraSlayer::printHeader(ostream& out) { m->mothurOut("\nOnly reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.\n"); out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; } //*************************************************************************************************************** Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { Sequence trim; if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); } if (chimeraFlags == "yes") { string chimeraFlag = "no"; if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) || (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } if (chimeraFlag == "yes") { if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n"); outAcc << querySeq.getName() << endl; if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } if (trimChimera) { int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart; int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart; string newAligned = trim.getAligned(); if (lengthLeft > lengthRight) { //trim right for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //trim left for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; } } trim.setAligned(newAligned); } } } printBlock(chimeraResults[0], chimeraFlag, out); }else { out << querySeq.getName() << "\tno" << endl; } return trim; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "print"); exit(1); } } //*************************************************************************************************************** Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) { try { Sequence trim; if (trimChimera) { string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned(); trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); } if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) { string chimeraFlag = "no"; if (leftPiece.flag == "yes") { if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR) || (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } } if (rightPiece.flag == "yes") { if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR) || (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } } bool rightChimeric = false; bool leftChimeric = false; if (chimeraFlag == "yes") { //which peice is chimeric or are both if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } } if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { m->mothurOutJustToScreen(querySeq.getName() + "\tyes\n"); outAcc << querySeq.getName() << endl; if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); } if (trimChimera) { string newAligned = trim.getAligned(); //right side is fine so keep that if ((leftChimeric) && (!rightChimeric)) { for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //both sides are chimeric, keep longest piece int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart; int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart; int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4 int length = lengthLeftLeft; if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; } int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart; int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart; if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; } if (lengthRightRight > length) { longest = 4; } if (longest == 1) { //leftleft for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 2) { //leftright //get rid of leftleft for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } //get rid of right for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else if (longest == 3) { //rightleft //get rid of left for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightright for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } }else { //rightright //get rid of left for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } //get rid of rightleft for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; } } } trim.setAligned(newAligned); } } } printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out); }else { out << querySeq.getName() << "\tno" << endl; } return trim; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "print"); exit(1); } } //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned()); printResults.trimQuery = trimQuery; chimeraFlags = "no"; printResults.flag = "no"; querySeq = *query; //you must create a template vector thisTemplate; vector thisFilteredTemplate; if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; } else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases if (m->getControl_pressed()) { return 0; } if (thisTemplate.size() == 0) { return 0; } //not chimeric //moved this out of maligner - 4/29/11 vector refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate); Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS); if (templateFileName == "self") { delete databaseRight; delete databaseLeft; } if (m->getControl_pressed()) { return 0; } string chimeraFlag = maligner.getResults(*query, decalc); if (m->getControl_pressed()) { return 0; } vector Results = maligner.getOutput(); //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } if (chimeraFlag == "yes") { if (realign) { vector parents; for (int i = 0; i < Results.size(); i++) { parents.push_back(Results[i].parentAligned); } ChimeraReAligner realigner; realigner.reAlign(query, parents); } //get sequence that were given from maligner results vector seqs; map removeDups; map::iterator itDup; map parentNameSeq; map::iterator itSeq; for (int j = 0; j < Results.size(); j++) { float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal; //only add if you are not a duplicate if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity itDup = removeDups.find(Results[j].parent); if (itDup == removeDups.end()) { //this is not duplicate removeDups[Results[j].parent] = dist; parentNameSeq[Results[j].parent] = Results[j].parentAligned; }else if (dist > itDup->second) { //is this a stronger number for this parent removeDups[Results[j].parent] = dist; parentNameSeq[Results[j].parent] = Results[j].parentAligned; } } } for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) { itSeq = parentNameSeq.find(itDup->first); Sequence seq(itDup->first, itSeq->second); SeqCompare member; member.seq = seq; member.dist = itDup->second; seqs.push_back(member); } //limit number of parents to explore - default 3 if (Results.size() > parents) { //sort by distance sort(seqs.begin(), seqs.end(), compareSeqCompare); //prioritize larger more similiar sequence fragments reverse(seqs.begin(), seqs.end()); } //put seqs into vector to send to slayer vector seqsForSlayer; for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } if (m->getControl_pressed()) { return 0; } //send to slayer chimeraFlags = slayer.getResults(*query, seqsForSlayer); if (m->getControl_pressed()) { return 0; } chimeraResults = slayer.getOutput(); printResults.flag = chimeraFlags; printResults.results = chimeraResults; //free memory //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } } return 0; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getChimeras"); exit(1); } } //*************************************************************************************************************** void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){ try { out << querySeq.getName(); out << '\t' << data.parentA.getName() << "\t" << data.parentB.getName(); out << '\t' << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa; out << '\t' << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb ; out << '\t' << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\n'; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "printBlock"); exit(1); } } //*************************************************************************************************************** void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){ try { if ((leftChimeric) && (!rightChimeric)) { //print left out << querySeq.getName(); out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName(); out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa; out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb; out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl; }else if ((!leftChimeric) && (rightChimeric)) { //print right out << querySeq.getName(); out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName(); out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa; out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb; out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << endl; }else { //print both results if (leftdata.flag == "yes") { out << querySeq.getName() + "_LEFT"; out << '\t' << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName(); out << '\t' << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa; out << '\t' << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb; out << '\t' << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << endl; } if (rightdata.flag == "yes") { out << querySeq.getName() + "_RIGHT"; out << '\t' << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName(); out << '\t' << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa; out << '\t' << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb; out << '\t' << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\n'; } } } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "printBlock"); exit(1); } } //*************************************************************************************************************** string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){ try { string out = ""; if ((leftChimeric) && (!rightChimeric)) { //get left out += querySeq.getName(); out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName(); out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa); out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb); out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n"; }else if ((!leftChimeric) && (rightChimeric)) { //print right out += querySeq.getName(); out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName(); out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa); out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb); out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n"; }else { //print both results if (leftdata.flag == "yes") { out += querySeq.getName() + "_LEFT"; out += "\t" + leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName(); out += "\t" + toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa); out += "\t" + toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb); out += "\t" + flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\n"; } if (rightdata.flag == "yes") { out += querySeq.getName() + "_RIGHT"; out += "\t" + rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName(); out += "\t" + toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa); out += "\t" + toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb); out += "\t" + flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\n"; } } return out; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getBlock"); exit(1); } } //*************************************************************************************************************** string ChimeraSlayer::getBlock(data_struct data, string flag){ try { string outputString = ""; outputString += querySeq.getName(); outputString += "\t" + data.parentA.getName() + "\t" + data.parentB.getName(); outputString += "\t" + toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa); outputString += "\t" + toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb); outputString += "\t" + flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\n"; return outputString; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getBlock"); exit(1); } } //*************************************************************************************************************** vector ChimeraSlayer::getRefSeqs(Sequence q, vector& thisTemplate, vector& thisFilteredTemplate){ try { vector refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes return refSeqs; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getRefSeqs"); exit(1); } } //*************************************************************************************************************** vector ChimeraSlayer::getKmerSeqs(Sequence q, vector& db, int num) { try { vector refResults; //get parts of query string queryUnAligned = q.getUnaligned(); string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence Sequence* queryLeft = new Sequence(q.getName(), leftQuery); Sequence* queryRight = new Sequence(q.getName(), rightQuery); vector Scores; vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num, Scores); vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, num, Scores); //merge results map seen; map::iterator it; vector mergedResults; int index = 0; // for (int i = 0; i < smaller.size(); i++) { while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; } //add left if you havent already it = seen.find(tempIndexesLeft[index]); if (it == seen.end()) { mergedResults.push_back(tempIndexesLeft[index]); seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; } //add right if you havent already it = seen.find(tempIndexesRight[index]); if (it == seen.end()) { mergedResults.push_back(tempIndexesRight[index]); seen[tempIndexesRight[index]] = tempIndexesRight[index]; } index++; } for (int i = index; i < tempIndexesLeft.size(); i++) { if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; } //add right if you havent already it = seen.find(tempIndexesLeft[i]); if (it == seen.end()) { mergedResults.push_back(tempIndexesLeft[i]); seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; } } for (int i = index; i < tempIndexesRight.size(); i++) { if (m->getControl_pressed()) { delete queryRight; delete queryLeft; return refResults; } //add right if you havent already it = seen.find(tempIndexesRight[i]); if (it == seen.end()) { mergedResults.push_back(tempIndexesRight[i]); seen[tempIndexesRight[i]] = tempIndexesRight[i]; } } for (int i = 0; i < mergedResults.size(); i++) { if (db[mergedResults[i]]->getName() != q.getName()) { Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); refResults.push_back(temp); } } delete queryRight; delete queryLeft; return refResults; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "getKmerSeqs"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/chimera/chimeraslayer.h000077500000000000000000000046161424121717000210440ustar00rootroot00000000000000#ifndef CHIMERASLAYER_H #define CHIMERASLAYER_H /* * chimeraslayer.h * Mothur * * Created by westcott on 9/25/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothurchimera.h" #include "maligner.h" #include "slayer.h" //***********************************************************************/ //This class was modeled after the chimeraSlayer written by the Broad Institute /***********************************************************************/ class ChimeraSlayer : public MothurChimera { public: ChimeraSlayer(string, string, bool, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, int); ChimeraSlayer(string, string, bool, map&, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, int); ChimeraSlayer(string, string, bool, map&, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, int, bool); ~ChimeraSlayer(); int getChimeras(Sequence*); Sequence print(ostream&, ostream&); Sequence print(ostream&, ostream&, data_results, data_results); void printHeader(ostream&); int doPrep(); int getNumNoParents() { return numNoParents; } data_results getResults() { return printResults; } private: Sequence querySeq; Sequence trimQuery; DeCalculator decalc; SearchDatabase* databaseRight; SearchDatabase* databaseLeft; map priority; //for template=self, seqname, seqAligned, abundance set chimericSeqs; //for template=self, so we don't add chimeric sequences to the userTemplate set int numNoParents, threadID; vector chimeraResults; data_results printResults; string chimeraFlags, fastafile; bool realign, trimChimera; int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment; float divR; void printBlock(data_struct, string, ostream&); void printBlock(data_results, data_results, bool, bool, string, ostream&); string getBlock(data_struct, string); string getBlock(data_results, data_results, bool, bool, string); //int readNameFile(string); vector getTemplate(Sequence, vector&); vector getRefSeqs(Sequence, vector&, vector&); vector getKmerSeqs(Sequence, vector&, int); }; /************************************************************************/ #endif mothur-1.48.0/source/chimera/decalc.cpp000077500000000000000000000547461424121717000177730ustar00rootroot00000000000000/* * decalc.cpp * Mothur * * Created by Sarah Westcott on 7/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "decalc.h" #include "mothurchimera.h" #include "calculator.h" #include "eachgapdist.h" #include "ignoregaps.h" #include "eachgapdist.h" //*************************************************************************************************************** void DeCalculator::setMask(string ms) { try { seqMask = ms; int count = 0; maskMap.clear(); if (seqMask.length() != 0) { //whereever there is a base in the mask, save that value is query and subject for (int i = 0; i < seqMask.length(); i++) { if (isalpha(seqMask[i])) { h.insert(i); maskMap[count] = i; count++; } } }else { for (int i = 0; i < alignLength; i++) { h.insert(i); maskMap[count] = i; count++; } } } catch(exception& e) { m->errorOut(e, "DeCalculator", "setMask"); exit(1); } } //*************************************************************************************************************** void DeCalculator::runMask(Sequence* seq) { try{ string q = seq->getAligned(); string tempQuery = ""; //whereever there is a base in the mask, save that value is query and subject set::iterator setit; for ( setit=h.begin() ; setit != h.end(); setit++ ) { tempQuery += q[*setit]; } //save masked values seq->setAligned(tempQuery); seq->setUnaligned(tempQuery); } catch(exception& e) { m->errorOut(e, "DeCalculator", "runMask"); exit(1); } } //*************************************************************************************************************** //num is query's spot in querySeqs void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map& trim) { try { string q = query->getAligned(); string s = subject->getAligned(); int front = 0; for (int i = 0; i < q.length(); i++) { if (isalpha(q[i]) && isalpha(s[i])) { front = i; break; } } int back = 0; for (int i = q.length(); i >= 0; i--) { if (isalpha(q[i]) && isalpha(s[i])) { back = i; break; } } trim[front] = back; } catch(exception& e) { m->errorOut(e, "DeCalculator", "trimSeqs"); exit(1); } } //*************************************************************************************************************** vector DeCalculator::findWindows(Sequence* query, int front, int back, int& size, int increment) { try { vector win; int cutoff = back - front; //back - front //if window is set to default if (size == 0) { if (cutoff > 1200) { size = 300; } else{ size = (cutoff / 4); } } else if (size > (cutoff / 4)) { m->mothurOut("[WARNING]: You have selected too large a window size for sequence " + query->getName() + ". I will choose an appropriate window size.\n"); size = (cutoff / 4); } //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps for (int i = front; i < (back - size) ; i+=increment) { win.push_back(i); } return win; } catch(exception& e) { m->errorOut(e, "DeCalculator", "findWindows"); exit(1); } } //*************************************************************************************************************** vector DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector window, int size) { try { if (query->getAligned().length() != subject->getAligned().length()) { m->mothurOut("[ERROR]: seqLengths must match. queryLength = " + toString(query->getAligned().length()) + ", subjectLength = " + toString(subject->getAligned().length()) +"\n"); m->setControl_pressed(true); } vector temp; //int gaps = 0; for (int i = 0; i < window.size(); i++) { if (m->getControl_pressed()) { break; } string seqFrag = query->getAligned().substr(window[i], size); string seqFragsub = subject->getAligned().substr(window[i], size); if (m->getDebug()) { m->mothurOut("[DEBUG]: seqLengths = " + toString(seqFrag.length()) + "," + toString(seqFragsub.length()) + "\t" + toString(window[i]) + "\t" + toString(size) +"\n"); } int diff = 0; for (int b = 0; b < seqFrag.length(); b++) { //if at least one is a base and they are not equal if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; } } //percentage of mismatched bases float dist = diff / (float) (seqFrag.length()) * 100; if (m->getDebug()) { m->mothurOut("[DEBUG]: diffs = " + toString(diff) + ", dist = " + toString(dist) + "\n"); } temp.push_back(dist); } return temp; } catch(exception& e) { m->errorOut(e, "DeCalculator", "calcObserved"); exit(1); } } //*************************************************************************************************************** float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) { try { //so you only look at the trimmed part of the sequence int cutoff = back - front; int gaps = 0; //from first startpoint with length back-front string seqFrag = query->getAligned().substr(front, cutoff); string seqFragsub = subject->getAligned().substr(front, cutoff); int diff = 0; for (int b = 0; b < seqFrag.length(); b++) { //ignore gaps if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; } if (seqFrag[b] != seqFragsub[b]) { diff++; } } //if the whole fragment is 0 distance = 0 if ((seqFrag.length()-gaps) == 0) { return 0.0; } //percentage of mismatched bases float dist = diff / (float) (seqFrag.length()-gaps) * 100; return dist; } catch(exception& e) { m->errorOut(e, "DeCalculator", "calcDist"); exit(1); } } //*************************************************************************************************************** vector DeCalculator::calcExpected(vector qav, float coef) { try { //for each window vector queryExpected; for (int j = 0; j < qav.size(); j++) { float expected = qav[j] * coef; queryExpected.push_back(expected); } return queryExpected; } catch(exception& e) { m->errorOut(e, "DeCalculator", "calcExpected"); exit(1); } } //*************************************************************************************************************** float DeCalculator::calcDE(vector obs, vector exp) { try { //for each window float sum = 0.0; //sum = sum from 1 to i of (oi-ei)^2 int numZeros = 0; for (int j = 0; j < obs.size(); j++) { sum += ((obs[j] - exp[j]) * (obs[j] - exp[j])); } float de = sqrt((sum / (obs.size() - 1 - numZeros))); return de; } catch(exception& e) { m->errorOut(e, "DeCalculator", "calcDE"); exit(1); } } //*************************************************************************************************************** vector DeCalculator::calcFreq(vector seqs, string filename, string version) { try { vector prob; Utils util; string freqfile = util.getRootName(filename) + "freq"; ofstream outFreq; util.openOutputFile(freqfile, outFreq); outFreq << "#" << version << endl; string length = toString(seqs.size()); //if there are 5000 seqs in the template then set precision to 3 int precision = length.length() - 1; //format output outFreq.setf(ios::fixed, ios::floatfield); outFreq.setf(ios::showpoint); //at each position in the sequence for (int i = 0; i < seqs[0]->getAligned().length(); i++) { vector freq; freq.resize(4,0); int gaps = 0; //find the frequency of each nucleotide for (int j = 0; j < seqs.size(); j++) { char value = seqs[j]->getAligned()[i]; if(toupper(value) == 'A') { freq[0]++; } else if(toupper(value) == 'T' || toupper(value) == 'U') { freq[1]++; } else if(toupper(value) == 'G') { freq[2]++; } else if(toupper(value) == 'C') { freq[3]++; } else { gaps++; } } //find base with highest frequency int highest = 0; for (int j = 0; j < freq.size(); j++) { if (freq[j] > highest) { highest = freq[j]; } } float highFreq = highest / (float) (seqs.size()); float Pi; Pi = (highFreq - 0.25) / 0.75; //cannot have probability less than 0. if (Pi < 0) { Pi = 0.0; } //saves this for later outFreq << setprecision(precision) << i << '\t' << highFreq << endl; if (h.count(i) > 0) { prob.push_back(Pi); } } outFreq.close(); return prob; } catch(exception& e) { m->errorOut(e, "DeCalculator", "calcFreq"); exit(1); } } //*************************************************************************************************************** vector DeCalculator::findQav(vector window, int size, vector probabilityProfile) { try { vector averages; //for each window find average for (int i = 0; i < window.size(); i++) { float average = 0.0; //while you are in the window for this sequence int count = 0; for (int j = window[i]; j < (window[i]+size); j++) { average += probabilityProfile[j]; count++; } average = average / count; //save this windows average averages.push_back(average); } return averages; } catch(exception& e) { m->errorOut(e, "DeCalculator", "findQav"); exit(1); } } //*************************************************************************************************************** //seqs have already been masked vector< vector > DeCalculator::getQuantiles(vector seqs, vector windowSizesTemplate, int window, vector probProfile, int increment, int start, int end) { try { vector< vector > quan; //percentage of mismatched pairs 1 to 100 quan.resize(100); //for each sequence for(int i = start; i < end; i++){ m->mothurOut("Processing sequence " + toString(i)+ "\n"); Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned()); //compare to every other sequence in template for(int j = 0; j < i; j++){ Sequence* subject = new Sequence(seqs[j]->getName(), seqs[j]->getAligned()); if (m->getControl_pressed()) { delete query; delete subject; return quan; } map trim; map::iterator it; trimSeqs(query, subject, trim); it = trim.begin(); int front = it->first; int back = it->second; //reset window for each new comparison windowSizesTemplate[i] = window; vector win = findWindows(query, front, back, windowSizesTemplate[i], increment); vector obsi = calcObserved(query, subject, win, windowSizesTemplate[i]); vector q = findQav(win, windowSizesTemplate[i], probProfile); float alpha = getCoef(obsi, q); vector exp = calcExpected(q, alpha); float de = calcDE(obsi, exp); float dist = calcDist(query, subject, front, back); dist = ceil(dist); quan[dist].push_back(de); delete subject; } delete query; } return quan; } catch(exception& e) { m->errorOut(e, "DeCalculator", "getQuantiles"); exit(1); } } //******************************************************************************************************************** //sorts lowest to highest inline bool compareQuanMembers(quanMember left, quanMember right){ return (left.score < right.score); } //*************************************************************************************************************** //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right. may want to revisit in the future... void DeCalculator::removeObviousOutliers(vector< vector >& quantiles, int num) { try { for (int i = 0; i < quantiles.size(); i++) { //find mean of this quantile score sort(quantiles[i].begin(), quantiles[i].end()); vector temp; if (quantiles[i].size() != 0) { float high = quantiles[i][int(quantiles[i].size() * 0.99)]; float low = quantiles[i][int(quantiles[i].size() * 0.01)]; //look at each value in quantiles to see if it is an outlier for (int j = 0; j < quantiles[i].size(); j++) { //is this score between 1 and 99% if ((quantiles[i][j] > low) && (quantiles[i][j] < high)) { temp.push_back(quantiles[i][j]); } } } quantiles[i] = temp; } } catch(exception& e) { m->errorOut(e, "DeCalculator", "removeObviousOutliers"); exit(1); } } //*************************************************************************************************************** float DeCalculator::findAverage(vector myVector) { try{ float total = 0.0; for (int i = 0; i < myVector.size(); i++) { total += myVector[i]; } float average = total / (float) myVector.size(); return average; } catch(exception& e) { m->errorOut(e, "DeCalculator", "findAverage"); exit(1); } } //*************************************************************************************************************** float DeCalculator::getCoef(vector obs, vector qav) { try { //find average prob for this seqs windows float probAverage = findAverage(qav); //find observed average float obsAverage = findAverage(obs); float coef = obsAverage / probAverage; return coef; } catch(exception& e) { m->errorOut(e, "DeCalculator", "getCoef"); exit(1); } } //*************************************************************************************************************** //gets closest matches to each end, since chimeras will most likely have different parents on each end vector DeCalculator::findClosest(Sequence querySeq, vector& thisTemplate, vector& thisFilteredTemplate, int numWanted, int minSim) { try { //indexes.clear(); vector seqsMatches; vector distsLeft; vector distsRight; DistCalc* distcalculator = new eachGapDist(1.0); string queryUnAligned = querySeq.getUnaligned(); int numBases = int(queryUnAligned.length() * 0.33); string leftQuery = ""; //first 1/3 of the sequence string rightQuery = ""; //last 1/3 of the sequence string queryAligned = querySeq.getAligned(); //left side bool foundFirstBase = false; int baseCount = 0; int leftSpot = 0; int firstBaseSpot = 0; for (int i = 0; i < queryAligned.length(); i++) { //if you are a base if (isalpha(queryAligned[i])) { baseCount++; if (!foundFirstBase) { foundFirstBase = true; firstBaseSpot = i; } } //eliminate opening .'s if (foundFirstBase) { leftQuery += queryAligned[i]; } //if you have 1/3 if (baseCount >= numBases) { leftSpot = i; break; } //first 1/3 } //right side - count through another 1/3, so you are at last third baseCount = 0; int rightSpot = 0; for (int i = leftSpot; i < queryAligned.length(); i++) { //if you are a base if (isalpha(queryAligned[i])) { baseCount++; } //if you have 1/3 if (baseCount > numBases + 1) { rightSpot = i; break; } //last 1/3 } //trim end //find last position in query that is a non gap character int lastBaseSpot = queryAligned.length()-1; for (int j = queryAligned.length()-1; j >= 0; j--) { if (isalpha(queryAligned[j])) { lastBaseSpot = j; break; } } rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //sequence from pos spot to end Sequence queryLeft(querySeq.getName(), leftQuery); Sequence queryRight(querySeq.getName(), rightQuery); for(int j = 0; j < thisFilteredTemplate.size(); j++){ string dbAligned = thisFilteredTemplate[j]->getAligned(); string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB); Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB); double distLeft = distcalculator->calcDist(queryLeft, dbLeft); double distRight = distcalculator->calcDist(queryRight, dbRight); SeqDist subjectLeft; subjectLeft.seq = nullptr; subjectLeft.dist = distLeft; subjectLeft.index = j; distsLeft.push_back(subjectLeft); SeqDist subjectRight; subjectRight.seq = nullptr; subjectRight.dist = distRight; subjectRight.index = j; distsRight.push_back(subjectRight); } delete distcalculator; //sort by smallest distance sort(distsRight.begin(), distsRight.end(), compareSeqDist); sort(distsLeft.begin(), distsLeft.end(), compareSeqDist); //merge results map seen; map::iterator it; vector dists; float lastRight = distsRight[0].dist; float lastLeft = distsLeft[0].dist; float maxDist = 1.0 - (minSim / 100.0); for (int i = 0; i < numWanted+1; i++) { if (m->getControl_pressed()) { return seqsMatches; } //add left if you havent already it = seen.find(thisTemplate[distsLeft[i].index]->getName()); if (it == seen.end() && distsLeft[i].dist <= maxDist) { dists.push_back(distsLeft[i]); seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName(); lastLeft = distsLeft[i].dist; } //add right if you havent already it = seen.find(thisTemplate[distsRight[i].index]->getName()); if (it == seen.end() && distsRight[i].dist <= maxDist) { dists.push_back(distsRight[i]); seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName(); lastRight = distsRight[i].dist; } if (i == numWanted) { break; } } //are we still above the minimum similarity cutoff if ((lastLeft >= minSim) || (lastRight >= minSim)) { //add in ties from left int i = numWanted; while (i < distsLeft.size()) { if (distsLeft[i].dist == lastLeft) { dists.push_back(distsLeft[i]); } else { break; } i++; } //add in ties from right i = numWanted; while (i < distsRight.size()) { if (distsRight[i].dist == lastRight) { dists.push_back(distsRight[i]); } else { break; } i++; } } for (int i = 0; i < dists.size(); i++) { if ((thisTemplate[dists[i].index]->getName() != querySeq.getName()) && (((1.0-dists[i].dist)*100) >= minSim)) { Sequence temp(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. seqsMatches.push_back(temp); } } return seqsMatches; } catch(exception& e) { m->errorOut(e, "DeCalculator", "findClosest"); exit(1); } } //*************************************************************************************************************** Sequence* DeCalculator::findClosest(Sequence* querySeq, vector db) { try { Sequence* seqsMatch; DistCalc* distcalculator = new eachGapDist(1.0); int index = 0; int smallest = 1000000; for(int j = 0; j < db.size(); j++){ double dist = distcalculator->calcDist(*querySeq, *db[j]); if (dist < smallest) { smallest = dist; index = j; } } delete distcalculator; seqsMatch = new Sequence(db[index]->getName(), db[index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. return seqsMatch; } catch(exception& e) { m->errorOut(e, "DeCalculator", "findClosest"); exit(1); } } /***************************************************************************************************************/ map DeCalculator::trimSeqs(Sequence& query, vector& topMatches) { try { int frontPos = 0; //should contain first position in all seqs that is not a gap character int rearPos = query.getAligned().length(); //********find first position in topMatches that is a non gap character***********// //find first position all query seqs that is a non gap character for (int i = 0; i < topMatches.size(); i++) { string aligned = topMatches[i].getAligned(); int pos = 0; //find first spot in this seq for (int j = 0; j < aligned.length(); j++) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos > frontPos) { frontPos = pos; } } string aligned = query.getAligned(); int pos = 0; //find first position in query that is a non gap character for (int j = 0; j < aligned.length(); j++) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos > frontPos) { frontPos = pos; } //********find last position in topMatches that is a non gap character***********// for (int i = 0; i < topMatches.size(); i++) { string aligned = topMatches[i].getAligned(); int pos = aligned.length(); //find first spot in this seq for (int j = aligned.length()-1; j >= 0; j--) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos < rearPos) { rearPos = pos; } } aligned = query.getAligned(); pos = aligned.length(); //find last position in query that is a non gap character for (int j = aligned.length()-1; j >= 0; j--) { if (isalpha(aligned[j])) { pos = j; break; } } //save this spot if it is the farthest if (pos < rearPos) { rearPos = pos; } map trimmedPos; //check to make sure that is not whole seq if ((rearPos - frontPos - 1) <= 0) { query.setAligned(""); //trim topMatches for (int i = 0; i < topMatches.size(); i++) { topMatches[i].setAligned(""); } }else { //trim query string newAligned = query.getAligned(); newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); query.setAligned(newAligned); //trim topMatches for (int i = 0; i < topMatches.size(); i++) { newAligned = topMatches[i].getAligned(); newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); topMatches[i].setAligned(newAligned); } for (int i = 0; i < newAligned.length(); i++) { trimmedPos[i] = i+frontPos; } } return trimmedPos; } catch(exception& e) { m->errorOut(e, "DeCalculator", "trimSequences"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/chimera/decalc.h000077500000000000000000000055261424121717000174300ustar00rootroot00000000000000#ifndef DECALC_H #define DECALC_H /* * decalc.h * Mothur * * Created by Sarah Westcott on 7/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "sequence.hpp" /***********************************************************************/ //This class was created using the algorithms described in the // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1. /***********************************************************************/ //this structure is necessary to determine the sequence that contributed to the outliers when we remove them //this way we can remove all scores that are contributed by outlier sequences. struct quanMember { float score; int member1; int member2; quanMember (float s, int m, int n) : score(s), member1(m), member2(n) {} quanMember() = default; }; //******************************************************************************************************************** class DeCalculator { public: DeCalculator() { m = MothurOut::getInstance(); } ~DeCalculator() = default;; vector findClosest(Sequence, vector&, vector&, int, int); //takes querySeq, a reference db, filteredRefDB, numWanted, minSim Sequence* findClosest(Sequence*, vector); set getPos() { return h; } void setMask(string); void setAlignmentLength(int l) { alignLength = l; } void runMask(Sequence*); void trimSeqs(Sequence*, Sequence*, map&); map trimSeqs(Sequence&, vector&); void removeObviousOutliers(vector< vector >&, int); vector calcFreq(vector, string, string); vector findWindows(Sequence*, int, int, int&, int); vector calcObserved(Sequence*, Sequence*, vector, int); vector calcExpected(vector, float); vector findQav(vector, int, vector); float calcDE(vector, vector); float calcDist(Sequence*, Sequence*, int, int); float getCoef(vector, vector); vector< vector > getQuantiles(vector, vector, int, vector, int, int, int); vector returnObviousOutliers(vector< vector >, int); map getMaskMap() { return maskMap; } private: //vector sortContrib(map); //used by mallard float findAverage(vector); //int findLargestContrib(vector); //void removeContrib(int, vector&); string seqMask; set h; int alignLength; map maskMap; MothurOut* m; }; /***********************************************************************/ #endif mothur-1.48.0/source/chimera/maligner.cpp000077500000000000000000000363311424121717000203440ustar00rootroot00000000000000/* * maligner.cpp * Mothur * * Created by westcott on 9/23/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "maligner.h" /***********************************************************************/ //int num, int match, int misMatch, , string mode, Database* dataLeft, Database* dataRight Maligner::Maligner(vector temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { //numWanted(num), , searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) m = MothurOut::getInstance(); } /***********************************************************************/ string Maligner::getResults(Sequence q, DeCalculator decalc) { try { outputResults.clear(); //make copy so trimming doesn't destroy query from calling class - remember to deallocate query.setName(q.getName()); query.setAligned(q.getAligned()); string chimera; //copy refSeqs so that filter does not effect original for(int i = 0; i < db.size(); i++) { Sequence newSeq(db[i].getName(), db[i].getAligned()); refSeqs.push_back(newSeq); } refSeqs = minCoverageFilter(refSeqs); if (refSeqs.size() < 2) { //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } percentIdenticalQueryChimera = 0.0; return "unknown"; } int chimeraPenalty = computeChimeraPenalty(); //fills outputResults chimera = chimeraMaligner(chimeraPenalty, decalc); if (m->getControl_pressed()) { return chimera; } //free memory //delete query; //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } return chimera; } catch(exception& e) { m->errorOut(e, "Maligner", "getResults"); exit(1); } } /***********************************************************************/ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator decalc) { try { string chimera; //trims seqs to first non gap char in all seqs and last non gap char in all seqs spotMap = decalc.trimSeqs(query, refSeqs); //you trimmed the whole sequence, skip if (query.getAligned() == "") { return "no"; } vector temp = refSeqs; temp.push_back(query); temp = verticalFilter(temp); query = temp[temp.size()-1]; for (int i = 0; i < temp.size()-1; i++) { refSeqs[i] = temp[i]; } vector< vector > matrix = buildScoreMatrix(query.getAligned().length(), refSeqs.size()); //builds and initializes if (m->getControl_pressed()) { return chimera; } fillScoreMatrix(matrix, refSeqs, chimeraPenalty); vector path = extractHighestPath(matrix); if (m->getControl_pressed()) { return chimera; } vector trace = mapTraceRegionsToAlignment(path); if (trace.size() > 1) { chimera = "yes"; } else { chimera = "no"; return chimera; } int traceStart = path[0].col; int traceEnd = path[path.size()-1].col; string queryInRange = query.getAligned(); queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); string chimeraSeq = constructChimericSeq(trace, refSeqs); percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); if (m->getControl_pressed()) { return chimera; } //save output results for (int i = 0; i < trace.size(); i++) { int regionStart = trace[i].col; int regionEnd = trace[i].oldCol; int seqIndex = trace[i].row; results temp; temp.parent = refSeqs[seqIndex].getName(); temp.parentAligned = db[seqIndex].getAligned(); temp.nastRegionStart = spotMap[regionStart]; temp.nastRegionEnd = spotMap[regionEnd]; temp.regionStart = unalignedMap[regionStart]; temp.regionEnd = unalignedMap[regionEnd]; string parentInRange = refSeqs[seqIndex].getAligned(); parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); string queryInRegion = query.getAligned(); queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); string parentInRegion = refSeqs[seqIndex].getAligned(); parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); outputResults.push_back(temp); } return chimera; } catch(exception& e) { m->errorOut(e, "Maligner", "chimeraMaligner"); exit(1); } } /***********************************************************************/ //removes top matches that do not have minimum coverage with query. vector Maligner::minCoverageFilter(vector ref){ try { vector newRefs; string queryAligned = query.getAligned(); for (int i = 0; i < ref.size(); i++) { string refAligned = ref[i].getAligned(); int numBases = 0; int numCovered = 0; //calculate coverage for (int j = 0; j < queryAligned.length(); j++) { if (isalpha(queryAligned[j])) { numBases++; if (isalpha(refAligned[j])) { numCovered++; } } } int coverage = ((numCovered/(float)numBases)*100); //if coverage above minimum if (coverage > minCoverage) { newRefs.push_back(ref[i]); } } return newRefs; } catch(exception& e) { m->errorOut(e, "Maligner", "minCoverageFilter"); exit(1); } } /***********************************************************************/ // a breakpoint should yield fewer mismatches than this number with respect to the best parent sequence. int Maligner::computeChimeraPenalty() { try { int numAllowable = ((1.0 - (1.0/minDivR)) * query.getNumBases()); int penalty = int(numAllowable + 1) * misMatchPenalty; return penalty; } catch(exception& e) { m->errorOut(e, "Maligner", "computeChimeraPenalty"); exit(1); } } /***********************************************************************/ //this is a vertical filter vector Maligner::verticalFilter(vector seqs) { try { vector gaps; gaps.resize(query.getAligned().length(), 0); string filterString = (string(query.getAligned().length(), '1')); //for each sequence for (int i = 0; i < seqs.size(); i++) { string seqAligned = seqs[i].getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; } } } //zero out spot where all sequences have blanks int numColRemoved = 0; for(int i = 0; i < seqs[0].getAligned().length(); i++){ if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } } map newMap; //for each sequence for (int i = 0; i < seqs.size(); i++) { string seqAligned = seqs[i].getAligned(); string newAligned = ""; int count = 0; for (int j = 0; j < seqAligned.length(); j++) { //if this spot is not a gap if (filterString[j] == '1') { newAligned += seqAligned[j]; newMap[count] = spotMap[j]; count++; } } seqs[i].setAligned(newAligned); } string query = seqs[seqs.size()-1].getAligned(); int queryLength = query.length(); unalignedMap.resize(queryLength, 0); for(int i=1;ierrorOut(e, "Maligner", "verticalFilter"); exit(1); } } //*************************************************************************************************************** vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { try{ vector< vector > m(rows); for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //initialize each cell score_struct temp; temp.prev = -1; temp.score = -9999999; temp.col = j; temp.row = i; m[i].push_back(temp); } } return m; } catch(exception& e) { m->errorOut(e, "Maligner", "buildScoreMatrix"); exit(1); } } //*************************************************************************************************************** void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { try{ //get matrix dimensions int numCols = query.getAligned().length(); int numRows = seqs.size(); //initialize first col string queryAligned = query.getAligned(); for (int i = 0; i < numRows; i++) { string subjectAligned = seqs[i].getAligned(); //are you both gaps? if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) { ms[i][0].score = 0; // ms[i][0].mismatches = 0; }else if (queryAligned[0] == subjectAligned[0]) { //|| subjectAligned[0] == 'N') ms[i][0].score = matchScore; // ms[i][0].mismatches = 0; }else{ ms[i][0].score = 0; // ms[i][0].mismatches = 1; } } //fill rest of matrix for (int j = 1; j < numCols; j++) { //iterate through matrix columns // for (int i = 0; i < 1; i++) { //iterate through matrix rows for (int i = 0; i < numRows; i++) { //iterate through matrix rows string subjectAligned = seqs[i].getAligned(); int matchMisMatchScore = 0; //are you both gaps? if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) { //leave the same }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) { //leave the same }else if (queryAligned[j] == subjectAligned[j]) { matchMisMatchScore = matchScore; }else if (queryAligned[j] != subjectAligned[j]) { matchMisMatchScore = misMatchPenalty; } //compute score based on previous columns scores for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows int sumScore = matchMisMatchScore + ms[prevIndex][j-1].score; //you are not at yourself if (prevIndex != i) { sumScore += penalty; } if (sumScore < 0) { sumScore = 0; } if (sumScore > ms[i][j].score) { ms[i][j].score = sumScore; ms[i][j].prev = prevIndex; } } } } } catch(exception& e) { m->errorOut(e, "Maligner", "fillScoreMatrix"); exit(1); } } //*************************************************************************************************************** vector Maligner::extractHighestPath(vector > ms) { try { //get matrix dimensions int numCols = query.getAligned().length(); int numRows = ms.size(); //find highest score scoring matrix score_struct highestStruct; int highestScore = 0; for (int i = 0; i < numRows; i++) { for (int j = 0; j < numCols; j++) { if (ms[i][j].score > highestScore) { highestScore = ms[i][j].score; highestStruct = ms[i][j]; } } } vector path; int rowIndex = highestStruct.row; int pos = highestStruct.col; int score = highestStruct.score; while (pos >= 0 && score > 0) { score_struct temp = ms[rowIndex][pos]; score = temp.score; if (score > 0) { path.push_back(temp); } rowIndex = temp.prev; pos--; } reverse(path.begin(), path.end()); return path; } catch(exception& e) { m->errorOut(e, "Maligner", "extractHighestPath"); exit(1); } } //*************************************************************************************************************** vector Maligner::mapTraceRegionsToAlignment(vector path) { try { vector trace; int region_index = path[0].row; int region_start = path[0].col; for (int i = 1; i < path.size(); i++) { int next_region_index = path[i].row; if (next_region_index != region_index) { // add trace region int col_index = path[i].col; trace_struct temp; temp.col = region_start; temp.oldCol = col_index-1; temp.row = region_index; trace.push_back(temp); region_index = path[i].row; region_start = col_index; } } // get last one trace_struct temp; temp.col = region_start; temp.oldCol = path[path.size()-1].col; temp.row = region_index; trace.push_back(temp); return trace; } catch(exception& e) { m->errorOut(e, "Maligner", "mapTraceRegionsToAlignment"); exit(1); } } //*************************************************************************************************************** string Maligner::constructChimericSeq(vector trace, vector seqs) { try { string chimera = ""; for (int i = 0; i < trace.size(); i++) { string seqAlign = seqs[trace[i].row].getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); chimera += seqAlign; } return chimera; } catch(exception& e) { m->errorOut(e, "Maligner", "constructChimericSeq"); exit(1); } } //*************************************************************************************************************** string Maligner::constructAntiChimericSeq(vector trace, vector seqs) { try { string antiChimera = ""; for (int i = 0; i < trace.size(); i++) { int oppositeIndex = trace.size() - i - 1; string seqAlign = seqs[trace[oppositeIndex].row].getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); antiChimera += seqAlign; } return antiChimera; } catch(exception& e) { m->errorOut(e, "Maligner", "constructChimericSeq"); exit(1); } } //*************************************************************************************************************** float Maligner::computePercentID(string queryAlign, string chimera) { try { if (queryAlign.length() != chimera.length()) { m->mothurOut("Error, alignment strings are of different lengths: \n"); m->mothurOut(toString(queryAlign.length())+ "\n"); m->mothurOut(toString(chimera.length())+ "\n"); return -1.0; } int numIdentical = 0; int countA = 0; int countB = 0; for (int i = 0; i < queryAlign.length(); i++) { if (((queryAlign[i] != 'G') && (queryAlign[i] != 'T') && (queryAlign[i] != 'A') && (queryAlign[i] != 'C')&& (queryAlign[i] != '.') && (queryAlign[i] != '-')) || ((chimera[i] != 'G') && (chimera[i] != 'T') && (chimera[i] != 'A') && (chimera[i] != 'C')&& (chimera[i] != '.') && (chimera[i] != '-'))) {} else { bool charA = false; bool charB = false; if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; } if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; } if (charA || charB) { if (charA) { countA++; } if (charB) { countB++; } if (queryAlign[i] == chimera[i]) { numIdentical++; } } } } float numBases = (countA + countB) /(float) 2; if (numBases == 0) { return 0; } float percentIdentical = (numIdentical/(float)numBases) * 100; return percentIdentical; } catch(exception& e) { m->errorOut(e, "Maligner", "computePercentID"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/chimera/maligner.h000077500000000000000000000036101424121717000200030ustar00rootroot00000000000000#ifndef MALIGNER_H #define MALIGNER_H /* * maligner.h * Mothur * * Created by westcott on 9/23/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "decalc.h" #include "mothurchimera.h" #include "searchdatabase.hpp" /***********************************************************************/ //This class was modeled after the chimeraMaligner written by the Broad Institute /**********************************************************************/ class Maligner { public: Maligner(vector, int, int, float, int, int); //int, int, int, , string, Database*, Database* ~Maligner() = default;; string getResults(Sequence, DeCalculator); float getPercentID() { return percentIdenticalQueryChimera; } vector getOutput() { return outputResults; } private: Sequence query; vector refSeqs; vector db; int minCoverage, minSimilarity, matchScore, misMatchPenalty; float minDivR, percentIdenticalQueryChimera; vector outputResults; map spotMap; vector unalignedMap; vector minCoverageFilter(vector); //removes top matches that do not have minimum coverage with query. int computeChimeraPenalty(); vector verticalFilter(vector); vector< vector > buildScoreMatrix(int, int); void fillScoreMatrix(vector >&, vector, int); vector extractHighestPath(vector >); vector mapTraceRegionsToAlignment(vector); string constructChimericSeq(vector, vector); string constructAntiChimericSeq(vector, vector); float computePercentID(string, string); string chimeraMaligner(int, DeCalculator); MothurOut* m; }; /***********************************************************************/ #endif mothur-1.48.0/source/chimera/mothurchimera.cpp000077500000000000000000000322061424121717000214120ustar00rootroot00000000000000/* * chimera.cpp * Mothur * * Created by Sarah Westcott on 8/11/09. * Copyright 2009 Schloss Lab Umass Amherst. All rights reserved. * */ #include "mothurchimera.h" //*************************************************************************************************************** //this is a vertical soft filter string MothurChimera::createFilter(vector seqs, float t) { try { filterString = ""; int threshold = int (t * seqs.size()); vector gaps; gaps.resize(seqs[0]->getAligned().length(), 0); vector a; a.resize(seqs[0]->getAligned().length(), 0); vector t; t.resize(seqs[0]->getAligned().length(), 0); vector g; g.resize(seqs[0]->getAligned().length(), 0); vector c; c.resize(seqs[0]->getAligned().length(), 0); filterString = (string(seqs[0]->getAligned().length(), '1')); //for each sequence for (int i = 0; i < seqs.size(); i++) { if (m->getControl_pressed()) { return filterString; } string seqAligned = seqs[i]->getAligned(); if (seqAligned.length() != filterString.length()) { m->mothurOut(seqs[i]->getName() + " is not the same length as the template sequences. Aborting!\n"); exit(1); } for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; } else if (toupper(seqAligned[j]) == 'A') { a[j]++; } else if (toupper(seqAligned[j]) == 'T') { t[j]++; } else if (toupper(seqAligned[j]) == 'G') { g[j]++; } else if (toupper(seqAligned[j]) == 'C') { c[j]++; } } } //zero out spot where all sequences have blanks int numColRemoved = 0; for(int i = 0;i < seqs[0]->getAligned().length(); i++){ if (m->getControl_pressed()) { return filterString; } if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) { filterString[i] = '0'; numColRemoved++; } } if (threshold != 0) { m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.\n"); } return filterString; } catch(exception& e) { m->errorOut(e, "MothurChimera", "createFilter"); exit(1); } } //*************************************************************************************************************** map MothurChimera::runFilter(Sequence* seq) { try { map maskMap; string seqAligned = seq->getAligned(); string newAligned = ""; int count = 0; for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if (filterString[j] == '1') { newAligned += seqAligned[j]; maskMap[count] = j; count++; } } seq->setAligned(newAligned); return maskMap; } catch(exception& e) { m->errorOut(e, "MothurChimera", "runFilter"); exit(1); } } //*************************************************************************************************************** vector MothurChimera::readSeqs(string file) { try { vector container; int count = 0; length = 0; unaligned = false; m->mothurOut("Reading sequences from " + file + "..."); cout.flush(); ifstream in; Utils util; util.openInputFile(file, in); //read in seqs and store in vector while(!in.eof()){ if (m->getControl_pressed()) { return container; } Sequence* current = new Sequence(in); gobble(in); if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length else if (length != current->getAligned().length()) { unaligned = true; } if (current->getName() != "") { container.push_back(current); } } in.close(); m->mothurOut("Done.\n"); filterString = (string(container[0]->getAligned().length(), '1')); return container; } catch(exception& e) { m->errorOut(e, "MothurChimera", "readSeqs"); exit(1); } } //*************************************************************************************************************** void MothurChimera::setMask(string filename) { try { if (filename == "default") { //default is from wigeon 236627 EU009184.1 Shigella dysenteriae str. FBD013 seqMask = ".....................................................................................................AAATTGAAGAGTTT-GA--T-CA-T-G-GCTC-AG-AT-TGAA-C-GC--TGG-C--G-GC-A-GG--C----C-T--AACACA-T-GC-A-AGT-CGA-A-CG----------G-TAA-CA-G----------------------------GAAG-A-AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CT-TCTTT----------------G-CT--G--AC--G--AG-T-GG-C-GG-A--C-------------GGG-TGAGT-A--AT-GT-C-T-G-GG---A-A--A-CT-G--C-C-TGA--TG-G------------------------------------------------------------------A-GG----GGG-AT-AA-CTA-------------------------C-T-G-----------------------GAA-A---CGG-TAG-CTAA-TA---CC-G--C-AT-A----------A--------------------C-------------------------------------GT-C-----------------------------------------------------------------------------------------------------------------------G-CA-A--------------------------------------------------------------------------------------------------------------------------------------G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-A-G-GG-----G--GA-C-CT--------------------------------------------------------------------------------------------------------------------TCG-G----------------------------------------------------------------------------------------------------------------------G----CC-TC--T---T-G--------------C----C-A---T-CG-G---AT---G-T-----G-CCC-AGA--T-GGG--A------TT--A--G-CT-A----G---TAGG-T-G-GG-G-T----AAC-GG-C-T-C-ACCT--A-GG-C-G--A-CG-A------------TCC-C-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AC-C-AG-CCAC-A-CTGGA--A-C-TG-A-GA-C-AC-G-G-TCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GC-GC-A----A-G-CC-T-GA-TG-CA-GCCA-TGCC-G-CG-T---G-T-A--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-TAC--------TT-TC-A-G--C-GGG----GA-G--G---AA-GGGA---GTAA-AG----T--T--AA-T---A----C-----CT-T-TGC-TCA-TT-GA-CG-TT-A-C-CC-G-CA-G---------AA-----------GAAGC-ACC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGC-AC--G-CA-G-G-C-G------------G--T-TT-G-T-T-AA----G-T-C-A---G-ATG-TG-A-AA-TC--CC-CGG-G--------------------------------------------------------------------CT-C-AA-------------------------------------------------------------------------CC-T-G-GG-AA-C----T-G-C-A-T-C--------T--GA-T-A-C-T-G-GCA--A-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-T-G-A-G-T-C-----T-CG--TA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-G-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TC-T-GGA--GG-A-AT-A-CC-GG--T--G--GC-GAA-G--G-C---G----G--C-C-CCCTG------G-AC-GA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-GG--T-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-TC--GA-CT---------T-GG--A--G-G-TT-G-TG-C--C--------------------------------------------------------------------------------------CTT-GA--------------------------------------------------------------------------------------------------------------------------------------------------G-G-C-GT--G-G-C-T-TC-C------GG--A----GC-TAA--CG-C-G-T--T--AA-GT--C----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGT-T--AAA-ACTC-AAA---------TGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-TGGTC-TT-G-AC-A-T-C--------------CAC-G-G-------------A-AG-T-T-T--TC--A-GA-G-A-T--G-A-G--A-A-T-G--T-G-----CC-------------------------------------T--TC-G------------------------------------------GG----A----A---CC-GTG---A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TT-G--TGA-AA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TC--C-TTTG--T-T-G-C-C---AG-C-G-----G-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCC------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GG---C----C-G------------G----G---A-A--CT---------------C-A-A-A-G-GA-G--AC-T-G-CCA--G-T------------------------------------G-A---TAA----------------------------------A-C-T-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-G-C-C-CTT----AC-G--AC-C-A-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGCA-T-A--C-AAA-GA-GA--------------------------------------------------------------------------------------------------A-G-C-G-A--C-CTCG-C--G---------------------------------------A-GA-G-C-----------A--A-G-CG---G----------A--CCT-C------A-T-AAAGT-GC-G-T-C-G-TAG-TCC--------GGA-T-TGGAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTCC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-T----GGA-TC-A-G--A------AT--GCC-AC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TGG-G-TT-GC-AAA--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C--------------------------------------------------------------TT----C-------------------------------------------------------------------------------------------------G--GG-A--GG-G--C---GC-TTA--CC--ACT-T----T-GTG-AT-TCA------------------------TG--ACT-GGGG-TG-AAG-TCGTAACAA-GGTAA-CCGT-AGGGGAA-CCTG-CGGT-TGGATCACCTCCTTA................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................"; }else if (filename == "") { //do nothing seqMask = ""; }else{ ifstream infile; Utils util; util.openInputFile(filename, infile); if (!infile.eof()) { Sequence temp(infile); seqMask = temp.getAligned(); }else { m->mothurOut("Problem with mask.\n"); seqMask = ""; } infile.close(); } } catch(exception& e) { m->errorOut(e, "MothurChimera", "setMask"); exit(1); } } //*************************************************************************************************************** Sequence* MothurChimera::getSequence(string name) { try{ Sequence* temp; //look through templateSeqs til you find it int spot = -1; for (int i = 0; i < templateSeqs.size(); i++) { if (name == templateSeqs[i]->getName()) { spot = i; break; } } if(spot == -1) { m->mothurOut("Error: Could not find sequence.\n"); return nullptr; } temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned()); return temp; } catch(exception& e) { m->errorOut(e, "MothurChimera", "getSequence"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/chimera/mothurchimera.h000077500000000000000000000125031424121717000210550ustar00rootroot00000000000000#ifndef CHIMERA_H #define CHIMERA_H /* * mothurchimera.h * Mothur * * Created by Sarah Westcott on 7/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "sequence.hpp" #include "currentfile.h" #include "utils.hpp" /***********************************************************************/ struct data_struct { float divr_qla_qrb; float divr_qlb_qra; float qla_qrb; float qlb_qra; float qla; float qrb; float ab; float qa; float qb; float lab; float rab; float qra; float qlb; int winLStart; int winLEnd; int winRStart; int winREnd; Sequence querySeq; Sequence parentA; Sequence parentB; float bsa; float bsb; float bsMax; float chimeraMax; }; /***********************************************************************/ struct data_results { vector results; string flag; Sequence trimQuery; //results malignerResults; data_results(vector d, string f, map s, Sequence t) : results(d), flag(f), trimQuery(t) {} data_results() = default; }; /***********************************************************************/ //sorts lowest to highest first by bsMax, then if tie by chimeraMax inline bool compareDataStruct(data_struct left, data_struct right){ if (left.bsMax < right.bsMax) { return true; } else if (left.bsMax == right.bsMax) { return (left.chimeraMax < right.chimeraMax); }else { return false; } } /***********************************************************************/ struct Preference { string name; string leftParent; //keep the name of closest left string rightParent; //keep the name of closest float score; //preference score float closestLeft; //keep the closest left float closestRight; //keep the closest right int midpoint; Preference() { name = ""; leftParent = ""; rightParent = ""; score = 0.0; closestLeft = 10000.0; closestRight = 10000.0; midpoint = 0; } ~Preference() = default; }; /***********************************************************************/ struct score_struct { int prev; int score; int row; int col; // int mismatches; }; /***********************************************************************/ struct trace_struct { int col; int oldCol; int row; }; /***********************************************************************/ struct results { int regionStart; int regionEnd; int nastRegionStart; int nastRegionEnd; string parent; string parentAligned; float queryToParent; float queryToParentLocal; float divR; }; /***********************************************************************/ struct SeqDist { Sequence* seq; float dist; int index; }; /***********************************************************************/ struct SeqCompare { Sequence seq; float dist; int index; }; //******************************************************************************************************************** //sorts lowest to highest inline bool compareRegionStart(results left, results right){ return (left.nastRegionStart < right.nastRegionStart); } //******************************************************************************************************************** //sorts lowest to highest inline bool compareSeqDist(SeqDist left, SeqDist right){ return (left.dist < right.dist); } //******************************************************************************************************************** //sorts lowest to highest inline bool compareSeqCompare(SeqCompare left, SeqCompare right){ return (left.dist < right.dist); } //******************************************************************************************************************** struct sim { string leftParent; string rightParent; float score; int midpoint; }; /***********************************************************************/ class MothurChimera { public: MothurChimera(){ m = MothurOut::getInstance(); current = CurrentFile::getInstance(); length = 0; unaligned = false; byGroup = false; } virtual ~MothurChimera(){ for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } for (int i = 0; i < filteredTemplateSeqs.size(); i++) { delete filteredTemplateSeqs[i]; } }; virtual bool getUnaligned() { return unaligned; } virtual int getLength() { return length; } virtual vector readSeqs(string); virtual void setMask(string); virtual map runFilter(Sequence*); virtual string createFilter(vector, float); virtual void printHeader(ostream&){}; virtual int getChimeras(Sequence*){ return 0; } virtual int getChimeras(){ return 0; } virtual Sequence print(ostream&, ostream&){ Sequence temp; return temp; } virtual Sequence print(ostream&, ostream&, data_results, data_results) { Sequence temp; return temp; } virtual int print(ostream&, ostream&, string){ return 0; } virtual int getNumNoParents(){ return 0; } virtual data_results getResults() { data_results results; return results; } protected: vector templateSeqs; vector filteredTemplateSeqs; bool filter, unaligned, byGroup; int length; string seqMask, filterString, outputDir, templateFileName; Sequence* getSequence(string); //find sequence from name MothurOut* m; CurrentFile* current; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/chimera/myPerseus.cpp000077500000000000000000000630631424121717000205440ustar00rootroot00000000000000/* * myPerseus.cpp * * * Created by Pat Schloss on 9/5/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "myPerseus.h" /**************************************************************************************************/ int PERSEUSMAXINT = numeric_limits::max(); /**************************************************************************************************/ vector > Perseus::binomial(int maxOrder){ try { vector > binomial(maxOrder+1); for(int i=0;i<=maxOrder;i++){ binomial[i].resize(maxOrder+1); binomial[i][0]=1; binomial[0][i]=0; } binomial[0][0]=1; binomial[1][0]=1; binomial[1][1]=1; for(int i=2;i<=maxOrder;i++){ binomial[1][i]=0; } for(int i=2;i<=maxOrder;i++){ for(int j=1;j<=maxOrder;j++){ if(i==j){ binomial[i][j]=1; } if(j>i) { binomial[i][j]=0; } else { binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j]; } } } return binomial; } catch(exception& e) { m->errorOut(e, "Perseus", "binomial"); exit(1); } } /**************************************************************************************************/ double Perseus::basicPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, pwModel model){ try { double GAP = model.GAP_OPEN; double MATCH = model.MATCH; double MISMATCH = model.MISMATCH; int queryLength = query.size(); int refLength = reference.size(); vector > alignMatrix(queryLength + 1); vector > alignMoves(queryLength + 1); for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i].resize(refLength + 1, 0); alignMoves[i].resize(refLength + 1, 'x'); } for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i][0] = GAP * i; alignMoves[i][0] = 'u'; } for(int i=0;i<=refLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[0][i] = GAP * i; alignMoves[0][i] = 'l'; } for(int i=1;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } for(int j=1;j<=refLength;j++){ double nogapScore; if(query[i-1] == reference[j-1]){ nogapScore = alignMatrix[i-1][j-1] + MATCH; } else { nogapScore = alignMatrix[i-1][j-1] + MISMATCH; } double leftScore; if(i == queryLength) { leftScore = alignMatrix[i][j-1]; } else { leftScore = alignMatrix[i][j-1] + GAP; } double upScore; if(j == refLength) { upScore = alignMatrix[i-1][j]; } else { upScore = alignMatrix[i-1][j] + GAP; } if(nogapScore > leftScore){ if(nogapScore > upScore){ alignMoves[i][j] = 'd'; alignMatrix[i][j] = nogapScore; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = upScore; } } else{ if(leftScore > upScore){ alignMoves[i][j] = 'l'; alignMatrix[i][j] = leftScore; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = upScore; } } } } int i = queryLength; int j = refLength; qAlign = ""; rAlign = ""; int diffs = 0; int length = 0; while(i > 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(alignMoves[i][j] == 'd'){ qAlign = query[i-1] + qAlign; rAlign = reference[j-1] + rAlign; if(query[i-1] != reference[j-1]){ diffs++; } length++; i--; j--; } else if(alignMoves[i][j] == 'u'){ qAlign = query[i-1] + qAlign; if(j != refLength) { rAlign = '-' + rAlign; diffs++; length++; } else { rAlign = '.' + rAlign; } i--; } else if(alignMoves[i][j] == 'l'){ rAlign = reference[j-1] + rAlign; if(i != queryLength){ qAlign = '-' + qAlign; diffs++; length++; } else { qAlign = '.' + qAlign; } j--; } } while(i>0){ if (m->getControl_pressed()) { return 0; } rAlign = '.' + rAlign; qAlign = query[i-1] + qAlign; i--; } while(j>0){ if (m->getControl_pressed()) { return 0; } rAlign = reference[j-1] + rAlign; qAlign = '.' + qAlign; j--; } return double(diffs)/double(length); } catch(exception& e) { m->errorOut(e, "Perseus", "basicPairwiseAlignSeqs"); exit(1); } } /**************************************************************************************************/ int Perseus::getDiffs(string qAlign, string rAlign, vector& leftDiffs, vector& leftMap, vector& rightDiffs, vector& rightMap){ try { int alignLength = qAlign.length(); int lDiffs = 0; int lCount = 0; for(int l=0;lgetControl_pressed()) { return 0; } if(qAlign[l] == '-'){ lDiffs++; } else if(qAlign[l] != '.'){ if(rAlign[l] == '-'){ lDiffs++; } else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){ lDiffs++; } leftDiffs[lCount] = lDiffs; leftMap[lCount] = l; lCount++; } } int rDiffs = 0; int rCount = 0; for(int l=alignLength-1;l>=0;l--){ if (m->getControl_pressed()) { return 0; } if(qAlign[l] == '-'){ rDiffs++; } else if(qAlign[l] != '.'){ if(rAlign[l] == '-'){ rDiffs++; } else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){ rDiffs++; } rightDiffs[rCount] = rDiffs; rightMap[rCount] = l; rCount++; } } return 0; } catch(exception& e) { m->errorOut(e, "Perseus", "getDiffs"); exit(1); } } /**************************************************************************************************/ int Perseus::getLastMatch(char direction, vector >& alignMoves, int i, int j, string& seqA, string& seqB){ try { char nullReturn = -1; while(i>=1 && j>=1){ if (m->getControl_pressed()) { return 0; } if(direction == 'd'){ if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; } else { return nullReturn; } } else if(direction == 'l') { j--; } else { i--; } direction = alignMoves[i][j]; } return nullReturn; } catch(exception& e) { m->errorOut(e, "Perseus", "getLastMatch"); exit(1); } } /**************************************************************************************************/ int Perseus::toInt(char b){ try { if(b == 'A') { return 0; } else if(b == 'C') { return 1; } else if(b == 'T') { return 2; } else if(b == 'G') { return 3; } else { m->mothurOut("[ERROR]: " + toString(b) + " is not ATGC.\n"); return -1; } } catch(exception& e) { m->errorOut(e, "Perseus", "toInt"); exit(1); } } /**************************************************************************************************/ double Perseus::modeledPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, vector >& correctMatrix){ try { int queryLength = query.size(); int refLength = reference.size(); vector > alignMatrix(queryLength + 1); vector > alignMoves(queryLength + 1); for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i].resize(refLength + 1, 0); alignMoves[i].resize(refLength + 1, 'x'); } for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i][0] = 15.0 * i; alignMoves[i][0] = 'u'; } for(int i=0;i<=refLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[0][i] = 15.0 * i; alignMoves[0][i] = 'l'; } for(int i=1;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } for(int j=1;j<=refLength;j++){ double nogap; nogap = alignMatrix[i-1][j-1] + correctMatrix[toInt(query[i-1])][toInt(reference[j-1])]; double gap; double left; if(i == queryLength){ //terminal gap left = alignMatrix[i][j-1]; } else{ if(reference[j-1] == getLastMatch('l', alignMoves, i, j, query, reference)){ gap = 4.0; } else{ gap = 15.0; } left = alignMatrix[i][j-1] + gap; } double up; if(j == refLength){ //terminal gap up = alignMatrix[i-1][j]; } else{ if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, reference)){ gap = 4.0; } else{ gap = 15.0; } up = alignMatrix[i-1][j] + gap; } if(nogap < left){ if(nogap < up){ alignMoves[i][j] = 'd'; alignMatrix[i][j] = nogap; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } else{ if(left < up){ alignMoves[i][j] = 'l'; alignMatrix[i][j] = left; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } } } int i = queryLength; int j = refLength; int alignLength = 0; while(i > 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(alignMoves[i][j] == 'd'){ qAlign = query[i-1] + qAlign; rAlign = reference[j-1] + rAlign; alignLength++; i--; j--; } else if(alignMoves[i][j] == 'u'){ if(j != refLength){ qAlign = query[i-1] + qAlign; rAlign = '-' + rAlign; alignLength++; } i--; } else if(alignMoves[i][j] == 'l'){ if(i != queryLength){ qAlign = '-' + qAlign; rAlign = reference[j-1] + rAlign; alignLength++; } j--; } } return alignMatrix[queryLength][refLength] / (double)alignLength; } catch(exception& e) { m->errorOut(e, "Perseus", "modeledPairwiseAlignSeqs"); exit(1); } } /**************************************************************************************************/ int Perseus::getAlignments(int curSequenceIndex, vector sequences, vector& alignments, vector >& leftDiffs, vector >& leftMaps, vector >& rightDiffs, vector >& rightMaps, int& bestRefSeq, int& bestRefDiff, vector& restricted){ try { int numSeqs = sequences.size(); //int bestSequenceMismatch = PERSEUSMAXINT; string curSequence = sequences[curSequenceIndex].sequence; int curFrequency = sequences[curSequenceIndex].frequency; bestRefSeq = -1; int bestIndex = -1; int bestDiffs = PERSEUSMAXINT; int comparisons = 0; pwModel model(0, -1, -1.5); for(int i=0;igetControl_pressed()) { return 0; } if(i != curSequenceIndex && restricted[i] != 1 && sequences[i].frequency >= 2 * curFrequency){ string refSequence = sequences[i].sequence; leftDiffs[i].assign(curSequence.length(), 0); leftMaps[i].assign(curSequence.length(), 0); rightDiffs[i].assign(curSequence.length(), 0); rightMaps[i].assign(curSequence.length(), 0); basicPairwiseAlignSeqs(curSequence, refSequence, alignments[i].query, alignments[i].reference, model); getDiffs(alignments[i].query, alignments[i].reference, leftDiffs[i], leftMaps[i], rightDiffs[i], rightMaps[i]); int diffs = rightDiffs[i][curSequence.length()-1]; if(diffs < bestDiffs){ bestDiffs = diffs; bestIndex = i; } comparisons++; restricted[i] = 0; } else{ restricted[i] = 1; } } bestRefSeq = bestIndex; bestRefDiff = bestDiffs; return comparisons; } catch(exception& e) { m->errorOut(e, "Perseus", "getAlignments"); exit(1); } } /**************************************************************************************************/ int Perseus::getChimera(vector sequences, vector >& leftDiffs, vector >& rightDiffs, int& leftParent, int& rightParent, int& breakPoint, vector& singleLeft, vector& bestLeft, vector& singleRight, vector& bestRight, vector restricted){ try { int numRefSeqs = restricted.size(); int seqLength = leftDiffs[0].size(); singleLeft.resize(seqLength, PERSEUSMAXINT); bestLeft.resize(seqLength, -1); for(int l=0;lgetControl_pressed()) { return 0; } for(int i=0;i sequences[bestLeft[l]].frequency))){ singleLeft[l] = leftDiffs[i][l]; bestLeft[l] = i; } } } } singleRight.resize(seqLength, PERSEUSMAXINT); bestRight.resize(seqLength, -1); for(int l=0;lgetControl_pressed()) { return 0; } for(int i=0;i sequences[bestRight[l]].frequency))){ singleRight[l] = rightDiffs[i][l]; bestRight[l] = i; } } } } int bestChimeraMismatches = PERSEUSMAXINT; leftParent = -1; rightParent = -1; breakPoint = -1; for(int l=0;lgetControl_pressed()) { return 0; } int chimera = singleLeft[l] + singleRight[seqLength - l - 2]; if(chimera < bestChimeraMismatches){ bestChimeraMismatches = chimera; breakPoint = l; leftParent = bestLeft[l]; rightParent = bestRight[seqLength - l - 2]; } } return bestChimeraMismatches; } catch(exception& e) { m->errorOut(e, "Perseus", "getChimera"); exit(1); } } /**************************************************************************************************/ string Perseus::stitchBimera(vector& alignments, int leftParent, int rightParent, int breakPoint, vector >& leftMaps, vector >& rightMaps){ try { int breakLeft = leftMaps[leftParent][breakPoint]; int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2]; string left = alignments[leftParent].reference; string right = alignments[rightParent].reference; string chimera = ""; for(int i=0;i<=breakLeft;i++){ if (m->getControl_pressed()) { return 0; } if(left[i] != '-' && left[i] != '.'){ chimera += left[i]; } } for(int i=breakRight;igetControl_pressed()) { return 0; } if(right[i] != '-' && right[i] != '.'){ chimera += right[i]; } } return chimera; } catch(exception& e) { m->errorOut(e, "Perseus", "stitchBimera"); exit(1); } } /**************************************************************************************************/ int Perseus::getTrimera(vector& sequences, vector >& leftDiffs, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector& singleLeft, vector& bestLeft, vector& singleRight, vector& bestRight, vector restricted){ try { int numRefSeqs = leftDiffs.size(); int alignLength = leftDiffs[0].size(); int bestTrimeraMismatches = PERSEUSMAXINT; leftParent = -1; middleParent = -1; rightParent = -1; breakPointA = -1; breakPointB = -1; vector > minDelta(alignLength); vector > minDeltaSeq(alignLength); for(int i=0;igetControl_pressed()) { return 0; } minDelta[i].assign(alignLength, PERSEUSMAXINT); minDeltaSeq[i].assign(alignLength, -1); } for(int x=0;xgetControl_pressed()) { return 0; } if(!restricted[i]){ int delta = leftDiffs[i][y] - leftDiffs[i][x]; if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){ minDelta[x][y] = delta; minDeltaSeq[x][y] = i; } } } minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2]; if(minDelta[x][y] < bestTrimeraMismatches){ bestTrimeraMismatches = minDelta[x][y]; breakPointA = x; breakPointB = y; leftParent = bestLeft[x]; middleParent = minDeltaSeq[x][y]; rightParent = bestRight[alignLength - y - 2]; } } } return bestTrimeraMismatches; } catch(exception& e) { m->errorOut(e, "Perseus", "getTrimera"); exit(1); } } /**************************************************************************************************/ string Perseus::stitchTrimera(vector alignments, int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB, vector >& leftMaps, vector >& rightMaps){ try { int p1SplitPoint = leftMaps[leftParent][breakPointA]; int p2SplitPoint = leftMaps[middleParent][breakPointB]; int p3SplitPoint = rightMaps[rightParent][rightMaps[rightParent].size() - breakPointB - 2]; string chimeraRefSeq; for(int i=0;i<=p1SplitPoint;i++){ if (m->getControl_pressed()) { return chimeraRefSeq; } if(alignments[leftParent].reference[i] != '-' && alignments[leftParent].reference[i] != '.'){ chimeraRefSeq += alignments[leftParent].reference[i]; } } for(int i=p1SplitPoint+1;i<=p2SplitPoint;i++){ if (m->getControl_pressed()) { return chimeraRefSeq; } if(alignments[middleParent].reference[i] != '-' && alignments[middleParent].reference[i] != '.'){ chimeraRefSeq += alignments[middleParent].reference[i]; } } for(int i=p3SplitPoint;igetControl_pressed()) { return chimeraRefSeq; } if(alignments[rightParent].reference[i] != '-' && alignments[rightParent].reference[i] != '.'){ chimeraRefSeq += alignments[rightParent].reference[i]; } } return chimeraRefSeq; } catch(exception& e) { m->errorOut(e, "Perseus", "stitchTrimera"); exit(1); } } /**************************************************************************************************/ int Perseus::threeWayAlign(string query, string parent1, string parent2, string& qAlign, string& aAlign, string& bAlign){ try { pwModel model(1.0, -1.0, -5.0); string qL, rL; string qR, rR; basicPairwiseAlignSeqs(query, parent1, qL, rL, model); basicPairwiseAlignSeqs(query, parent2, qR, rR, model); int lLength = qL.length(); int rLength = qR.length(); string qLNew, rLNew; string qRNew, rRNew; int lIndex = 0; int rIndex = 0; while(lIndexgetControl_pressed()) { return 0; } if(qL[lIndex] == qR[rIndex]){ qLNew += qL[lIndex]; rLNew += rL[lIndex]; lIndex++; qRNew += qR[rIndex]; rRNew += rR[rIndex]; rIndex++; } else if(qL[lIndex] == '-' || qL[lIndex] == '.'){ //insert a gap into the right sequences qLNew += qL[lIndex]; rLNew += rL[lIndex]; lIndex++; if(rIndex != rLength){ qRNew += '-'; rRNew += '-'; } else{ qRNew += '.'; rRNew += '.'; } } else if(qR[rIndex] == '-' || qR[rIndex] == '.'){ //insert a gap into the left sequences qRNew += qR[rIndex]; rRNew += rR[rIndex]; rIndex++; if(lIndex != lLength){ qLNew += '-'; rLNew += '-'; } else{ qLNew += '.'; rLNew += '.'; } } } qAlign = qLNew; aAlign = rLNew; bAlign = rRNew; bool qStart = 0; bool aStart = 0; bool bStart = 0; for(int i=0;igetControl_pressed()) { return 0; } if(qStart == 0){ if(qAlign[i] == '-') { qAlign[i] = '.'; } else { qStart = 1; } } if(aStart == 0){ if(aAlign[i] == '-') { aAlign[i] = '.'; } else { aStart = 1; } } if(bStart == 0){ if(bAlign[i] == '-') { bAlign[i] = '.'; } else { bStart = 1; } } if(aStart == 1 && bStart == 1 && qStart == 1){ break; } } return 0; } catch(exception& e) { m->errorOut(e, "Perseus", "threeWayAlign"); exit(1); } } /**************************************************************************************************/ double Perseus::calcLoonIndex(string query, string parent1, string parent2, int breakPoint, vector >& binMatrix){ try { string queryAln, leftParentAln, rightParentAln; threeWayAlign(query, parent1, parent2, queryAln, leftParentAln, rightParentAln); int alignLength = queryAln.length(); int endPos = alignLength; for(int i=alignLength-1;i>=0; i--){ if(queryAln[i] != '.' && leftParentAln[i] != '.' && rightParentAln[i] != '.'){ endPos = i + 1; break; } } int diffToLeftCount = 0; vector diffToLeftMap(alignLength, 0); int diffToRightCount = 0; vector diffToRightMap(alignLength, 0); for(int i=0;igetControl_pressed()) { return 0; } if(queryAln[i] != leftParentAln[i]){ diffToLeftMap[diffToLeftCount] = i; diffToLeftCount++; } if(queryAln[i] != rightParentAln[i]){ diffToRightMap[diffToRightCount] = i; diffToRightCount++; } } diffToLeftMap[diffToLeftCount] = endPos; diffToRightMap[diffToRightCount] = endPos; int indexL = 0; int indexR = 0; int indexS = 0; vector diffs; vector splits; splits.push_back(-1); diffs.push_back(diffToRightCount); indexS++; while(indexL < diffToLeftCount || indexR < diffToRightCount){ if (m->getControl_pressed()) { return 0; } if(diffToLeftMap[indexL] <= diffToRightMap[indexR]){ diffs.push_back(diffs[indexS - 1] + 1); splits.push_back(diffToLeftMap[indexL]); indexL++; indexS++; } else if(diffToLeftMap[indexL] > diffToRightMap[indexR]) { diffs.push_back(diffs[indexS - 1] - 1); splits.push_back(diffToRightMap[indexR]); indexR++; indexS++; } } int minDiff = PERSEUSMAXINT; int minIndex = -1; for(int i=0;igetControl_pressed()) { return 0; } if(diffs[i] < minDiff){ minDiff = diffs[i]; minIndex = i; } } int splitPos = endPos; if(minIndex < indexS - 1){ splitPos = (splits[minIndex]+splits[minIndex+1]) / 2; } int diffToChimera = 0; int leftDiffToP1 = 0; int rightDiffToP1 = 0; int leftDiffToP2 = 0; int rightDiffToP2 = 0; for(int i=0;igetControl_pressed()) { return 0; } char bQuery = queryAln[i]; char bP1 = leftParentAln[i]; char bP2 = rightParentAln[i]; char bConsensus = bQuery; if(bP1 == bP2){ bConsensus = bP1; } if(bConsensus != bQuery){ diffToChimera++; } if(bConsensus != bP1){ if(i <= splitPos){ leftDiffToP1++; } else{ rightDiffToP1++; } } if(bConsensus != bP2){ if(i <= splitPos){ leftDiffToP2++; } else{ rightDiffToP2++; } } } int diffToClosestParent, diffToFurtherParent; int xA, xB, yA, yB; double aFraction, bFraction; if(diffToLeftCount <= diffToRightCount){ //if parent 1 is closer diffToClosestParent = leftDiffToP1 + rightDiffToP1; xA = leftDiffToP1; xB = rightDiffToP1; diffToFurtherParent = leftDiffToP2 + rightDiffToP2; yA = leftDiffToP2; yB = rightDiffToP2; aFraction = double(splitPos + 1)/(double) endPos; bFraction = 1 - aFraction; } else{ //if parent 2 is closer diffToClosestParent = leftDiffToP2 + rightDiffToP2; xA = rightDiffToP2; xB = leftDiffToP2; diffToFurtherParent = leftDiffToP1 + rightDiffToP1; yA = rightDiffToP1; yB = leftDiffToP1; bFraction = double(splitPos + 1)/(double) endPos; aFraction = 1 - bFraction; } double loonIndex = 0; int totalDifference = diffToClosestParent + diffToChimera; if(totalDifference > 0){ double prob = 0; for(int i=diffToClosestParent;i<=totalDifference;i++){ prob += binMatrix[totalDifference][i] * pow(0.50, i) * pow(0.50, totalDifference - i); } loonIndex += -log(prob); } if(diffToFurtherParent > 0){ double prob = 0; for(int i=yA;i<=diffToFurtherParent;i++){ prob += binMatrix[diffToFurtherParent][i] * pow(aFraction, i) * pow(1-aFraction, diffToFurtherParent - i); } loonIndex += -log(prob); } if(diffToClosestParent > 0){ double prob = 0; for(int i=xB;i<=diffToClosestParent;i++){ prob += binMatrix[diffToClosestParent][i] * pow(bFraction, i) * pow(1-bFraction, diffToClosestParent - i); } loonIndex += -log(prob); } return loonIndex; } catch(exception& e) { m->errorOut(e, "Perseus", "calcLoonIndex"); exit(1); } } /**************************************************************************************************/ double Perseus::calcBestDistance(string query, string reference){ try { int alignLength = query.length(); int mismatch = 0; int counter = 0; for(int i=0;igetControl_pressed()) { return 0; } if((query[i] != '.' || reference[i] != '.') && (query[i] != '-' && reference[i] != '-')){ if(query[i] != reference[i]){ mismatch++; } counter++; } } return (double)mismatch / (double)counter; } catch(exception& e) { m->errorOut(e, "Perseus", "calcBestDistance"); exit(1); } } /**************************************************************************************************/ double Perseus::classifyChimera(double singleDist, double cIndex, double loonIndex, double alpha, double beta){ try { double difference = cIndex - singleDist; //y double probability; if(cIndex >= 0.15 || difference > 0.00){ probability = 0.0000; } else{ probability = 1.0 / (1.0 + exp(-(alpha + beta * loonIndex))); } return probability; } catch(exception& e) { m->errorOut(e, "Perseus", "classifyChimera"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/chimera/myPerseus.h000077500000000000000000000055421424121717000202070ustar00rootroot00000000000000#ifndef MOTHURPERSEUS #define MOTHURPERSEUS /* * myPerseus.h * * * Created by Pat Schloss on 9/5/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "mothurout.h" /**************************************************************************************************/ struct seqData { seqData(string name, string seq, int freq) : seqName(name), sequence(seq), frequency(freq) { } bool operator<( seqData const& rhs ) const { bool verdict = 0; if(frequency < rhs.frequency){ verdict = 1; } else if(frequency == rhs.frequency){ verdict = (seqName > rhs.seqName); } return verdict; } string seqName; string sequence; int frequency; }; /**************************************************************************************************/ struct pwModel { pwModel(double m, double mm, double g): MATCH(m), MISMATCH(mm), GAP_OPEN(g) {;} double MATCH; double MISMATCH; double GAP_OPEN; }; /**************************************************************************************************/ struct pwAlign { pwAlign(): query(""), reference(""){} pwAlign(string q, string r): query(q), reference(r){} string query; string reference; }; /**************************************************************************************************/ class Perseus { public: Perseus() { m = MothurOut::getInstance(); } ~Perseus() = default; vector > binomial(int); double modeledPairwiseAlignSeqs(string, string, string&, string&, vector >&); int getAlignments(int, vector, vector&, vector >& , vector >&, vector >&, vector >&, int&, int&, vector&); int getChimera(vector,vector >&, vector >&,int&, int&, int&,vector&, vector&, vector&, vector&, vector); string stitchBimera(vector&, int, int, int, vector >&, vector >&); int getTrimera(vector&, vector >&, int&, int&, int&, int&, int&, vector&, vector&, vector&, vector&, vector); string stitchTrimera(vector, int, int, int, int, int, vector >&, vector >&); double calcLoonIndex(string, string, string, int, vector >&); double classifyChimera(double, double, double, double, double); private: MothurOut* m; int toInt(char); double basicPairwiseAlignSeqs(string, string, string&, string&, pwModel); int getDiffs(string, string, vector&, vector&, vector&, vector&); int getLastMatch(char, vector >&, int, int, string&, string&); int threeWayAlign(string, string, string, string&, string&, string&); double calcBestDistance(string, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/chimera/pintail.cpp000077500000000000000000000324471424121717000202120ustar00rootroot00000000000000/* * pintail.cpp * Mothur * * Created by Sarah Westcott on 7/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "pintail.h" #include "ignoregaps.h" #include "eachgapdist.h" //******************************************************************************************************************** //sorts lowest to highest inline bool compareQuanMembers(quanMember left, quanMember right){ return (left.score < right.score); } //*************************************************************************************************************** Pintail::Pintail(string filename, string temp, bool f, string mask, string cons, string q, int win, int inc, string o, string version) : MothurChimera() { try { fastafile = filename; templateFileName = temp; templateSeqs = readSeqs(temp); filter = f; setMask(mask); consfile = cons; quanfile = q; window = win; increment = inc; outputDir = o; distcalculator = new eachGapDist(1.0); decalc = new DeCalculator(); doPrep(version); } catch(exception& e) { m->errorOut(e, "Pintail", "Pintail"); exit(1); } } //*************************************************************************************************************** Pintail::~Pintail() { try { delete distcalculator; delete decalc; } catch(exception& e) { m->errorOut(e, "Pintail", "~Pintail"); exit(1); } } //*************************************************************************************************************** int Pintail::doPrep(string version) { try { mergedFilterString = ""; windowSizesTemplate.resize(templateSeqs.size(), window); quantiles.resize(100); //one for every percent mismatch quantilesMembers.resize(100); //one for every percent mismatch //if the user does not enter a mask then you want to keep all the spots in the alignment if (seqMask.length() == 0) { decalc->setAlignmentLength(templateSeqs[0]->getAligned().length()); } else { decalc->setAlignmentLength(seqMask.length()); } decalc->setMask(seqMask); m->mothurOut("Getting conservation... "); cout.flush(); if (consfile == "") { m->mothurOut("Calculating probability of conservation for your template sequences. This can take a while... I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command. Providing the .freq file will improve speed. "); cout.flush(); probabilityProfile = decalc->calcFreq(templateSeqs, templateFileName, version); if (m->getControl_pressed()) { return 0; } m->mothurOut("Done.\n"); }else { probabilityProfile = readFreq(); m->mothurOut("Done."); } m->mothurOutEndLine(); //make P into Q for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; } // bool reRead = false; //create filter if needed for later if (filter) { //read in all query seqs vector tempQuerySeqs = readSeqs(fastafile); vector temp; //merge query seqs and template seqs temp = templateSeqs; for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); } if (seqMask != "") { reRead = true; //mask templates for (int i = 0; i < temp.size(); i++) { if (m->getControl_pressed()) { for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } return 0; } decalc->runMask(temp[i]); } } mergedFilterString = createFilter(temp, 0.5); if (m->getControl_pressed()) { for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } return 0; } //reread template seqs for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; } } //quantiles are used to determine whether the de values found indicate a chimera //if you have to calculate them, its time intensive because you are finding the de and deviation values for each //combination of sequences in the template if (quanfile != "") { quantiles = readQuantiles(); }else { if ((!filter) && (seqMask != "")) { //if you didn't filter but you want to mask. if you filtered then you did mask first above. reRead = true; //mask templates for (int i = 0; i < templateSeqs.size(); i++) { if (m->getControl_pressed()) { return 0; } decalc->runMask(templateSeqs[i]); } } if (filter) { reRead = true; for (int i = 0; i < templateSeqs.size(); i++) { if (m->getControl_pressed()) { return 0; } runFilter(templateSeqs[i]); } } m->mothurOut("Calculating quantiles for your template. This can take a while... I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command. Providing the .quan file will dramatically improve speed. "); cout.flush(); quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size()); if (m->getControl_pressed()) { return 0; } string noOutliers, outliers; if ((!filter) && (seqMask == "")) { noOutliers = util.getRootName(util.getSimpleName(templateFileName)) + "pintail.quan"; }else if ((!filter) && (seqMask != "")) { noOutliers =util.getRootName(util.getSimpleName(templateFileName)) + "pintail.masked.quan"; }else if ((filter) && (seqMask != "")) { noOutliers = util.getRootName(util.getSimpleName(templateFileName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "masked.quan"; }else if ((filter) && (seqMask == "")) { noOutliers = util.getRootName(util.getSimpleName(templateFileName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "quan"; } decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size()); if (m->getControl_pressed()) { return 0; } string outputString = "#" + current->getVersion() + "\n"; //adjust quantiles for (int i = 0; i < quantilesMembers.size(); i++) { vector temp; if (quantilesMembers[i].size() == 0) { //in case this is not a distance found in your template files for (int g = 0; g < 6; g++) { temp.push_back(0.0); } }else{ sort(quantilesMembers[i].begin(), quantilesMembers[i].end()); //save 10% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)]); //save 25% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)]); //save 50% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)]); //save 75% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)]); //save 95% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)]); //save 99% temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)]); } //output quan value outputString += toString(i+1); for (int u = 0; u < temp.size(); u++) { outputString += "\t" + toString(temp[u]); } outputString += "\n"; quantiles[i] = temp; } printQuanFile(noOutliers, outputString); //free memory quantilesMembers.clear(); m->mothurOut("Done.\n"); } if (reRead) { for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } templateSeqs.clear(); templateSeqs = readSeqs(templateFileName); } return 0; } catch(exception& e) { m->errorOut(e, "Pintail", "doPrep"); exit(1); } } //*************************************************************************************************************** Sequence Pintail::print(ostream& out, ostream& outAcc) { try { int index = ceil(deviation); //is your DE value higher than the 95% string chimera; if (index != 0) { //if index is 0 then its an exact match to a template seq if (util.isEqual(quantiles[index][4], 0)) { chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index); }else { if (DE > quantiles[index][4]) { chimera = "Yes"; } else { chimera = "No"; } } }else{ chimera = "No"; } out << querySeq->getName() << '\t' << "div: " << deviation << "\tstDev: " << DE << "\tchimera flag: " << chimera << endl; if (chimera == "Yes") { m->mothurOut(querySeq->getName() + "\tdiv: " + toString(deviation) + "\tstDev: " + toString(DE) + "\tchimera flag: " + chimera+ "\n"); outAcc << querySeq->getName() << endl; } out << "Observed"; for (int j = 0; j < obsDistance.size(); j++) { out << '\t' << obsDistance[j]; } out << endl; out << "Expected"; for (int m = 0; m < expectedDistance.size(); m++) { out << '\t' << expectedDistance[m] ; } out << endl; return *querySeq; } catch(exception& e) { m->errorOut(e, "Pintail", "print"); exit(1); } } //*************************************************************************************************************** int Pintail::getChimeras(Sequence* query) { try { querySeq = query; trimmed.clear(); windowSizes = window; //find pairs has to be done before a mask bestfit = findPairs(query); if (m->getControl_pressed()) { return 0; } //if they mask if (seqMask != "") { decalc->runMask(query); decalc->runMask(bestfit); } if (filter) { //must be done after a mask runFilter(query); runFilter(bestfit); } //trim seq decalc->trimSeqs(query, bestfit, trimmed); //find windows it = trimmed.begin(); windowsForeachQuery = decalc->findWindows(query, it->first, it->second, windowSizes, increment); //find observed distance obsDistance = decalc->calcObserved(query, bestfit, windowsForeachQuery, windowSizes); if (m->getControl_pressed()) { return 0; } Qav = decalc->findQav(windowsForeachQuery, windowSizes, probabilityProfile); if (m->getControl_pressed()) { return 0; } //find alpha seqCoef = decalc->getCoef(obsDistance, Qav); //calculating expected distance expectedDistance = decalc->calcExpected(Qav, seqCoef); if (m->getControl_pressed()) { return 0; } //finding de DE = decalc->calcDE(obsDistance, expectedDistance); if (m->getControl_pressed()) { return 0; } //find distance between query and closest match it = trimmed.begin(); deviation = decalc->calcDist(query, bestfit, it->first, it->second); delete bestfit; return 0; } catch(exception& e) { m->errorOut(e, "Pintail", "getChimeras"); exit(1); } } //*************************************************************************************************************** vector Pintail::readFreq() { try { //read in probabilities and store in vector int pos; float num; vector prob; set h = decalc->getPos(); //positions of bases in masking sequence ifstream in; util.openInputFile(consfile, in); //read version string line = util.getline(in); gobble(in); while(!in.eof()){ in >> pos >> num; if (h.count(pos) > 0) { float Pi; Pi = (num - 0.25) / 0.75; //cannot have probability less than 0. if (Pi < 0) { Pi = 0.0; } //do you want this spot prob.push_back(Pi); } gobble(in); } in.close(); return prob; } catch(exception& e) { m->errorOut(e, "Pintail", "readFreq"); exit(1); } } //*************************************************************************************************************** //calculate the distances from each query sequence to all sequences in the template to find the closest sequence Sequence* Pintail::findPairs(Sequence* q) { try { Sequence* seqsMatches; seqsMatches = decalc->findClosest(q, templateSeqs); return seqsMatches; } catch(exception& e) { m->errorOut(e, "Pintail", "findPairs"); exit(1); } } //*************************************************************************************************************** vector< vector > Pintail::readQuantiles() { try { int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; vector< vector > quan; vector temp; temp.resize(6, 0); //to fill 0 quan.push_back(temp); ifstream in; util.openInputFile(quanfile, in); //read version string line = util.getline(in); gobble(in); while(!in.eof()){ in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; temp.clear(); temp.push_back(ten); temp.push_back(twentyfive); temp.push_back(fifty); temp.push_back(seventyfive); temp.push_back(ninetyfive); temp.push_back(ninetynine); quan.push_back(temp); gobble(in); } in.close(); return quan; } catch(exception& e) { m->errorOut(e, "Pintail", "readQuantiles"); exit(1); } } //***************************************************************************************************************/ void Pintail::printQuanFile(string file, string outputString) { try { ofstream outQuan; util.openOutputFile(file, outQuan); outQuan << outputString; outQuan.close(); } catch(exception& e) { m->errorOut(e, "Pintail", "printQuanFile"); exit(1); } } //***************************************************************************************************************/ mothur-1.48.0/source/chimera/pintail.h000077500000000000000000000057711424121717000176570ustar00rootroot00000000000000#ifndef PINTAIL_H #define PINTAIL_H /* * pintail.h * Mothur * * Created by Sarah Westcott on 7/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothurchimera.h" #include "calculator.h" #include "decalc.h" /***********************************************************/ //This class was created using the algorithms described in the // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1. /***********************************************************/ class Pintail : public MothurChimera { public: Pintail(string, string, bool, string, string, string, int, int, string, string); //fastafile, templatefile, filter, processors, mask, conservation, quantile, window, increment, outputDir, version) ~Pintail(); int getChimeras(Sequence*); Sequence print(ostream&, ostream&); void setCons(string c) { consfile = c; } void setQuantiles(string q) { quanfile = q; } private: DistCalc* distcalculator; DeCalculator* decalc; int iters, window, increment, processors; string fastafile, quanfile, consfile; Sequence* querySeq; Sequence* bestfit; //closest match to query in template vector obsDistance; //obsDistance is the vector of observed distances for query vector expectedDistance; //expectedDistance is the vector of expected distances for query float deviation; //deviation is the percentage of mismatched pairs over the whole seq between query and its best match. vector windowsForeachQuery; // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window. //this is needed so you can move by bases and not just spots in the alignment int windowSizes; //windowSizes = window size of query vector windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0] map trimmed; //trimmed = start and stop of trimmed sequences for query map::iterator it; vector Qav; //Qav is the vector of average variablility for query float seqCoef; //seqCoef is the coeff for query float DE; //DE is the deviaation for query vector probabilityProfile; vector< vector > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2... vector< vector > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2... set h; string mergedFilterString; vector< vector > readQuantiles(); vector readFreq(); Sequence* findPairs(Sequence*); int doPrep(string); void printQuanFile(string, string); }; /***********************************************************/ #endif mothur-1.48.0/source/chimera/slayer.cpp000077500000000000000000000362021424121717000200420ustar00rootroot00000000000000/* * slayer.cpp * Mothur * * Created by westcott on 9/25/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "slayer.h" /***********************************************************************/ Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i, int snp, int mi) : minBS(mi), windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i), percentSNPSample(snp){ m = MothurOut::getInstance(); } /***********************************************************************/ string Slayer::getResults(Sequence query, vector refSeqs) { try { vector all; all.clear(); myQuery = query; for (int i = 0; i < refSeqs.size(); i++) { for (int j = i+1; j < refSeqs.size(); j++) { if (m->getControl_pressed()) { return "no"; } //make copies of query and each parent because runBellerophon removes gaps and messes them up Sequence q(query.getName(), query.getAligned()); Sequence leftParent(refSeqs[i].getName(), refSeqs[i].getAligned()); Sequence rightParent(refSeqs[j].getName(), refSeqs[j].getAligned()); map spots; //map from spot in original sequence to spot in filtered sequence for query and both parents vector divs = runBellerophon(q, leftParent, rightParent, spots); if (m->getControl_pressed()) { return "no"; } vector selectedDivs; for (int k = 0; k < divs.size(); k++) { vector snpsLeft = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winLStart, divs[k].winLEnd); vector snpsRight = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winRStart, divs[k].winREnd); if (m->getControl_pressed()) { return "no"; } int numSNPSLeft = snpsLeft.size(); int numSNPSRight = snpsRight.size(); //require at least 4 SNPs on each side of the break if ((numSNPSLeft >= 4) && (numSNPSRight >= 4)) { float BS_A, BS_B; bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B, iters); if (m->getControl_pressed()) { return "no"; } divs[k].bsa = BS_A; divs[k].bsb = BS_B; divs[k].bsMax = max(BS_A, BS_B); divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra); //so results reflect orignal alignment divs[k].winLStart = spots[divs[k].winLStart]; divs[k].winLEnd = spots[divs[k].winLEnd]; divs[k].winRStart = spots[divs[k].winRStart]; divs[k].winREnd = spots[divs[k].winREnd]; selectedDivs.push_back(divs[k]); } } //save selected for (int mi = 0; mi < selectedDivs.size(); mi++) { all.push_back(selectedDivs[mi]); } } } // compute bootstrap support if (all.size() > 0) { //sort them sort(all.begin(), all.end(), compareDataStruct); reverse(all.begin(), all.end()); outputResults = all; return "yes"; }else { outputResults = all; return "no"; } } catch(exception& e) { m->errorOut(e, "Slayer", "getResults"); exit(1); } } /***********************************************************************/ vector Slayer::runBellerophon(Sequence q, Sequence pA, Sequence pB, map& spots) { try{ vector data; //maps spot in new alignment to spot in alignment before filter spots = verticalFilter(q, pA, pB); //fills baseSpots //get these to avoid numerous function calls string query = q.getAligned(); string parentA = pA.getAligned(); string parentB = pB.getAligned(); int length = query.length(); //check window size if (length < (2*windowSize+windowStep)) { // m->mothurOut("Your window size is too large for " + q->getName() + ". I will make the window size " + toString(length/4) + " which is 1/4 the filtered length.\n"); windowSize = length / 4; } for (int i = windowSize-1; i <= (length - windowSize); i += windowStep) { if (m->getControl_pressed()) { return data; } int breakpoint = i; int leftLength = breakpoint + 1; int rightLength = length - leftLength; float QLA = computePercentID(query, parentA, 0, breakpoint); float QRB = computePercentID(query, parentB, breakpoint+1, length-1); float QLB = computePercentID(query, parentB, 0, breakpoint); float QRA = computePercentID(query, parentA, breakpoint+1, length-1); float LAB = computePercentID(parentA, parentB, 0, breakpoint); float RAB = computePercentID(parentA, parentB, breakpoint+1, length-1); float AB = ((LAB*leftLength) + (RAB*rightLength)) / (float) length; float QA = ((QLA*leftLength) + (QRA*rightLength)) / (float) length; float QB = ((QLB*leftLength) + (QRB*rightLength)) / (float) length; float QLA_QRB = ((QLA*leftLength) + (QRB*rightLength)) / (float) length; float QLB_QRA = ((QLB*leftLength) + (QRA*rightLength)) / (float) length; //in original and not used //float avgQA_QB = ((QA*leftLength) + (QB*rightLength)) / (float) length; float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB)); float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB)); //is one of them above the if (divR_QLA_QRB >= divRThreshold || divR_QLB_QRA >= divRThreshold) { if (((QLA_QRB > QA) && (QLA_QRB > QB) && (QLA >= parentFragmentThreshold) && (QRB >= parentFragmentThreshold)) || ((QLB_QRA > QA) && (QLB_QRA > QB) && (QLB >=parentFragmentThreshold) && (QRA >= parentFragmentThreshold))) { data_struct member; member.divr_qla_qrb = divR_QLA_QRB; member.divr_qlb_qra = divR_QLB_QRA; member.qla_qrb = QLA_QRB; member.qlb_qra = QLB_QRA; member.qla = QLA; member.qrb = QRB; member.ab = AB; member.qa = QA; member.qb = QB; member.lab = LAB; member.rab = RAB; member.qra = QRA; member.qlb = QLB; member.winLStart = 0; member.winLEnd = breakpoint; member.winRStart = breakpoint+1; member.winREnd = length-1; member.querySeq = q; member.parentA = pA; member.parentB = pB; member.bsa = 0; member.bsb = 0; member.bsMax = 0; member.chimeraMax = 0; data.push_back(member); }//if }//if }//for return data; } catch(exception& e) { m->errorOut(e, "Slayer", "runBellerophon"); exit(1); } } /***********************************************************************/ vector Slayer::getSNPS(string parentA, string query, string parentB, int left, int right) { try { vector data; for (int i = left; i <= right; i++) { char A = parentA[i]; char Q = query[i]; char B = parentB[i]; if ((A != Q) || (B != Q)) { //ensure not neighboring a gap. change to 12/09 release of chimeraSlayer - not sure what this adds, but it eliminates alot of SNPS if ( //did query loose a base here during filter?? ( i == 0 || abs (baseSpots[0][i] - baseSpots[0][i-1]) == 1) && ( i == query.length()-1 || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1) && //did parentA loose a base here during filter?? ( i == 0 || abs (baseSpots[1][i] - baseSpots[1][i-1]) == 1) && ( i == parentA.length()-1 || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) && //did parentB loose a base here during filter?? ( i == 0 || abs (baseSpots[2][i] - baseSpots[2][i-1]) == 1) && ( i == parentB.length()-1 || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1) ) { snps member; member.queryChar = Q; member.parentAChar = A; member.parentBChar = B; data.push_back(member); } } } return data; } catch(exception& e) { m->errorOut(e, "Slayer", "getSNPS"); exit(1); } } /***********************************************************************/ int Slayer::bootstrapSNPS(vector left, vector right, float& BSA, float& BSB, int numIters) { try { m->setRandomSeed((unsigned)time( nullptr )); int count_A = 0; // sceneario QLA,QRB supported int count_B = 0; // sceneario QLB,QRA supported int numLeft = max(1, int(left.size() * percentSNPSample/(float)100 + 0.5)); int numRight = max(1, int(right.size() * percentSNPSample/(float)100 + 0.5)); Utils util; for (int i = 0; i < numIters; i++) { //random sampling with replacement. if (m->getControl_pressed()) { return 0; } vector selectedLeft; for (int j = 0; j < numLeft; j++) { int index = util.getRandomIndex((int)left.size()-1); selectedLeft.push_back(left[index]); } vector selectedRight; for (int j = 0; j < numRight; j++) { int index = util.getRandomIndex((int)right.size()-1); selectedRight.push_back(right[index]); } /* A ------------------------------------------ # QLA QRA # Q ------------------------------------------ # | # | # Q ------------------------------------------ # QLB QRB # B ------------------------------------------ */ float QLA = snpQA(selectedLeft); float QRA = snpQA(selectedRight); float QLB = snpQB(selectedLeft); float QRB = snpQB(selectedRight); //in original - not used - not sure why? //float ALB = snpAB(selectedLeft); //float ARB = snpAB(selectedRight); if ((QLA > QLB) && (QRB > QRA)) { count_A++; } if ((QLB > QLA) && (QRA > QRB)) { count_B++; } } BSA = (float) count_A / (float) numIters * 100; BSB = (float) count_B / (float) numIters * 100; return 0; } catch(exception& e) { m->errorOut(e, "Slayer", "bootstrapSNPS"); exit(1); } } /***********************************************************************/ float Slayer::snpQA(vector data) { try { int numIdentical = 0; for (int i = 0; i < data.size(); i++) { if (data[i].parentAChar == data[i].queryChar) { numIdentical++; } } float percentID = (numIdentical / (float) data.size()) * 100; return percentID; } catch(exception& e) { m->errorOut(e, "Slayer", "snpQA"); exit(1); } } /***********************************************************************/ float Slayer::snpQB(vector data) { try { int numIdentical = 0; for (int i = 0; i < data.size(); i++) { if (data[i].parentBChar == data[i].queryChar) { numIdentical++; } } float percentID = (numIdentical / (float) data.size()) * 100; return percentID; } catch(exception& e) { m->errorOut(e, "Slayer", "snpQB"); exit(1); } } /***********************************************************************/ float Slayer::snpAB(vector data) { try { int numIdentical = 0; for (int i = 0; i < data.size(); i++) { if (data[i].parentAChar == data[i].parentBChar) { numIdentical++; } } float percentID = (numIdentical / (float) data.size()) * 100; return percentID; } catch(exception& e) { m->errorOut(e, "Slayer", "snpAB"); exit(1); } } /***********************************************************************/ float Slayer::computePercentID(string queryAlign, string chimera, int left, int right) { try { int numIdentical = 0; int countA = 0; int countB = 0; for (int i = left; i <= right; i++) { if (((queryAlign[i] != 'G') && (queryAlign[i] != 'T') && (queryAlign[i] != 'A') && (queryAlign[i] != 'C')&& (queryAlign[i] != '.') && (queryAlign[i] != '-')) || ((chimera[i] != 'G') && (chimera[i] != 'T') && (chimera[i] != 'A') && (chimera[i] != 'C')&& (chimera[i] != '.') && (chimera[i] != '-'))) {} else { bool charA = false; bool charB = false; if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; } if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; } if (charA || charB) { if (charA) { countA++; } if (charB) { countB++; } if (queryAlign[i] == chimera[i]) { numIdentical++; } } } } float numBases = (countA + countB) /(float) 2; if (numBases == 0) { return 0; } float percentIdentical = (numIdentical/(float)numBases) * 100; return percentIdentical; } catch(exception& e) { m->errorOut(e, "Slayer", "computePercentID"); exit(1); } } /***********************************************************************/ //remove columns that contain any gaps map Slayer::verticalFilter(Sequence& q, Sequence& pA, Sequence& pB) { try { //find baseSpots baseSpots.clear(); baseSpots.resize(3); //query, parentA, parentB vector gaps; gaps.resize(q.getAligned().length(), 0); string filterString = (string(q.getAligned().length(), '1')); string seqAligned = q.getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N')) { gaps[j]++; } } seqAligned = pA.getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N')) { gaps[j]++; } } seqAligned = pB.getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N')) { gaps[j]++; } } //zero out spot where any sequences have blanks int numColRemoved = 0; int count = 0; map maskMap; maskMap.clear(); for(int i = 0; i < q.getAligned().length(); i++){ if(gaps[i] != 0) { filterString[i] = '0'; numColRemoved++; } else { maskMap[count] = i; count++; } } seqAligned = q.getAligned(); string newAligned = ""; int baseCount = 0; count = 0; for (int j = 0; j < seqAligned.length(); j++) { //are you a base if ((seqAligned[j] != '-') && (seqAligned[j] != '.') && (toupper(seqAligned[j]) != 'N')) { baseCount++; } //if this spot is not a gap if (filterString[j] == '1') { newAligned += seqAligned[j]; baseSpots[0][count] = baseCount; count++; } } q.setAligned(newAligned); seqAligned = pA.getAligned(); newAligned = ""; baseCount = 0; count = 0; for (int j = 0; j < seqAligned.length(); j++) { //are you a base if ((seqAligned[j] != '-') && (seqAligned[j] != '.') && (toupper(seqAligned[j]) != 'N')) { baseCount++; } //if this spot is not a gap if (filterString[j] == '1') { newAligned += seqAligned[j]; baseSpots[1][count] = baseCount; count++; } } pA.setAligned(newAligned); seqAligned = pB.getAligned(); newAligned = ""; baseCount = 0; count = 0; for (int j = 0; j < seqAligned.length(); j++) { //are you a base if ((seqAligned[j] != '-') && (seqAligned[j] != '.') && (toupper(seqAligned[j]) != 'N')) { baseCount++; } //if this spot is not a gap if (filterString[j] == '1') { newAligned += seqAligned[j]; baseSpots[2][count] = baseCount; count++; } } pB.setAligned(newAligned); return maskMap; } catch(exception& e) { m->errorOut(e, "Slayer", "verticalFilter"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/chimera/slayer.h000077500000000000000000000030611424121717000175040ustar00rootroot00000000000000#ifndef SLAYER_H #define SLAYER_H /* * slayer.h * Mothur * * Created by westcott on 9/25/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "sequence.hpp" #include "mothurchimera.h" /***********************************************************************/ //This class was modeled after the chimeraSlayer written by the Broad Institute /***********************************************************************/ struct snps { char queryChar; char parentAChar; char parentBChar; }; /***********************************************************************/ class Slayer { public: Slayer(int, int, int, float, int, int, int); ~Slayer() = default;; string getResults(Sequence, vector); vector getOutput() { return outputResults; } private: int windowSize, windowStep, parentFragmentThreshold, iters, percentSNPSample, minBS; float divRThreshold; vector outputResults; vector< map > baseSpots; Sequence myQuery; map verticalFilter(Sequence&, Sequence&, Sequence&); float computePercentID(string, string, int, int); vector runBellerophon(Sequence, Sequence, Sequence, map&); vector getSNPS(string, string, string, int, int); int bootstrapSNPS(vector, vector, float&, float&, int); float snpQA(vector); float snpQB(vector); float snpAB(vector); MothurOut* m; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/classifier/000077500000000000000000000000001424121717000165455ustar00rootroot00000000000000mothur-1.48.0/source/classifier/alignnode.cpp000077500000000000000000000213041424121717000212140ustar00rootroot00000000000000/* * alignNode.cpp * bayesian * * Created by Pat Schloss on 10/11/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "alignnode.h" #include "taxonomynode.h" #include "bayesian.h" /**************************************************************************************************/ AlignNode::AlignNode(string n, int l): TaxonomyNode(n, l){ alignLength = 0; } /**************************************************************************************************/ void AlignNode::printTheta(){ try { m->mothurOut("A:\t"); for(int i=0;imothurOut(toString(theta[i].A)+ '\t'); } m->mothurOutEndLine(); m->mothurOut("T:\t"); for(int i=0;imothurOut(toString(theta[i].T)+ '\t'); } m->mothurOutEndLine(); m->mothurOut("G:\t"); for(int i=0;imothurOut(toString(theta[i].G)+ '\t'); } m->mothurOutEndLine(); m->mothurOut("C:\t"); for(int i=0;imothurOut(toString(theta[i].C)+ '\t'); } m->mothurOutEndLine(); m->mothurOut("I:\t"); for(int i=0;imothurOut(toString(theta[i].gap)+ '\t'); } m->mothurOutEndLine(); } catch(exception& e) { m->errorOut(e, "AlignNode", "printTheta"); exit(1); } } /**************************************************************************************************/ int AlignNode::loadSequence(string& sequence){ try { alignLength = (int)sequence.length(); // this function runs through the alignment and increments the frequency // of each base for a particular taxon. we are building the thetas if(theta.size() == 0){ theta.resize(alignLength); columnCounts.resize(alignLength, 0); } for(int i=0;igetControl_pressed()) { return 0; } char base = sequence[i]; if(base == 'A') { theta[i].A++; columnCounts[i]++; } // our thetas will be alignLength x 5 else if(base == 'T'){ theta[i].T++; columnCounts[i]++; } // and we ignore any position that has else if(base == 'G'){ theta[i].G++; columnCounts[i]++; } // an ambiguous base call else if(base == 'C'){ theta[i].C++; columnCounts[i]++; } else if(base == '-'){ theta[i].gap++; columnCounts[i]++; } else if(base == 'U'){ theta[i].T++; columnCounts[i]++; } } numSeqs++; return 0; } catch(exception& e) { m->errorOut(e, "AlignNode", "loadSequence"); exit(1); } } /**************************************************************************************************/ int AlignNode::checkTheta(){ try { for(int i=0;igetControl_pressed()) { return 0; } if(theta[i].gap == columnCounts[i]){ columnCounts[i] = 0; } // else{ // int maxCount = theta[i].A; // // if(theta[i].T > maxCount) { maxCount = theta[i].T; } // if(theta[i].G > maxCount) { maxCount = theta[i].T; } // if(theta[i].C > maxCount) { maxCount = theta[i].T; } // if(theta[i].gap > maxCount) { maxCount = theta[i].T; } // // if(maxCount < columnCounts[i] * 0.25){// || maxCount == columnCounts[i]){ //remove any column where the maximum frequency is <50% // columnCounts[i] = 0; // } // } } return 0; } catch(exception& e) { m->errorOut(e, "AlignNode", "checkTheta"); exit(1); } } /**************************************************************************************************/ int AlignNode::addThetas(vector newTheta, int newNumSeqs){ try { if(alignLength == 0){ alignLength = (int)newTheta.size(); theta.resize(alignLength); columnCounts.resize(alignLength); } for(int i=0;igetControl_pressed()) { return 0; } theta[i].A += newTheta[i].A; columnCounts[i] += newTheta[i].A; theta[i].T += newTheta[i].T; columnCounts[i] += newTheta[i].T; theta[i].G += newTheta[i].G; columnCounts[i] += newTheta[i].G; theta[i].C += newTheta[i].C; columnCounts[i] += newTheta[i].C; theta[i].gap += newTheta[i].gap; columnCounts[i] += newTheta[i].gap; } numSeqs += newNumSeqs; return 0; } catch(exception& e) { m->errorOut(e, "AlignNode", "addThetas"); exit(1); } } /**************************************************************************************************/ double AlignNode::getSimToConsensus(string& query){ try { double similarity = 0; int length = 0; for(int i=0;igetControl_pressed()) { return similarity; } char base = query[i]; if(base != '.' && base != 'N' && columnCounts[i] != 0){ double fraction = 0; if(base == 'A'){ fraction = (int) theta[i].A / (double) columnCounts[i]; similarity += fraction; length++; } else if(base == 'T'){ fraction = (int) theta[i].T / (double) columnCounts[i]; similarity += fraction; length++; } else if(base == 'G'){ fraction = (int) theta[i].G / (double) columnCounts[i]; similarity += fraction; length++; } else if(base == 'C'){ fraction = (int) theta[i].C / (double) columnCounts[i]; similarity += fraction; length++; } else if(base == '-'){ fraction = (int) theta[i].gap / (double) columnCounts[i]; similarity += fraction; length++; } } } if(length != 0){ similarity /= double(length); } else { similarity = 0; } return similarity; } catch(exception& e) { m->errorOut(e, "AlignNode", "getSimToConsensus"); exit(1); } } /**************************************************************************************************/ double AlignNode::getPxGivenkj_D_j(string& query){ //P(x | k_j, D, j) try { double PxGivenkj_D_j = 0; int count = 0; double alpha = 1 / (double)totalSeqs; //flat prior for(int s=0;sgetControl_pressed()) { return PxGivenkj_D_j; } char base = query[s]; thetaAlign thetaS = theta[s]; if(base != '.' && base != 'N' && columnCounts[s] != 0){ double Nkj_s = (double)columnCounts[s]; double nkj_si = 0; if(base == 'A') { nkj_si = (double)thetaS.A; } else if(base == 'T'){ nkj_si = (double)thetaS.T; } else if(base == 'G'){ nkj_si = (double)thetaS.G; } else if(base == 'C'){ nkj_si = (double)thetaS.C; } else if(base == '-'){ nkj_si = (double)thetaS.gap; } else if(base == 'U'){ nkj_si = (double)thetaS.T; } // double alpha = pow(0.2, double(Nkj_s)) + 0.0001; //need to make 1e-4 a variable in future; this is the non-flat prior // if(columnCounts[s] != nkj_si){ //deal only with segregating sites... double numerator = nkj_si + alpha; double denomenator = Nkj_s + 5.0 * alpha; PxGivenkj_D_j += log(numerator) - log(denomenator); count++; // } } if(base != '.' && columnCounts[s] == 0 && thetaS.gap == 0){ count = 0; break; } } if(count == 0){ PxGivenkj_D_j = -1e10; } return PxGivenkj_D_j; } catch(exception& e) { m->errorOut(e, "AlignNode", "getPxGivenkj_D_j"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/alignnode.h000077500000000000000000000021321424121717000206570ustar00rootroot00000000000000#ifndef ALIGNNODE #define ALIGNNODE /* * alignNode.h * bayesian * * Created by Pat Schloss on 10/11/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "taxonomynode.h" /**************************************************************************************************/ struct thetaAlign { thetaAlign() : A(0), T(0), G(0), C(0), gap(0){} unsigned int A; unsigned int T; unsigned int G; unsigned int C; unsigned int gap; }; /**************************************************************************************************/ class AlignNode : public TaxonomyNode { public: AlignNode(string, int); int loadSequence(string&); int checkTheta(); void printTheta(); double getPxGivenkj_D_j(string& query); //P(x | k_j, D, j) double getSimToConsensus(string& query); vector getTheta() { return theta; } int addThetas(vector, int); private: vector theta; vector columnCounts; int alignLength; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/aligntree.cpp000077500000000000000000000306221424121717000212310ustar00rootroot00000000000000// // alignTree.cpp // pdsBayesian // // Created by Patrick Schloss on 4/3/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #include "alignnode.h" #include "aligntree.h" /**************************************************************************************************/ AlignTree::AlignTree(string referenceFileName, string taxonomyFileName, int cutoff) : Classify(), confidenceThreshold(cutoff){ try { AlignNode* newNode = new AlignNode("Root", 0); tree.push_back(newNode); // the tree is stored as a vector of elements of type TaxonomyNode string refTaxonomy; readTaxonomy(taxonomyFileName); ifstream referenceFile; Utils util; util.openInputFile(referenceFileName, referenceFile); bool error = false; map lengths; while(!referenceFile.eof()){ if (m->getControl_pressed()) { break; } Sequence seq(referenceFile); gobble(referenceFile); if (seq.getName() != "") { map::iterator it = taxonomy.find(seq.getName()); if (it != taxonomy.end()) { refTaxonomy = it->second; // lookup the taxonomy string for the current reference sequence string aligned = seq.getAligned(); lengths[aligned.length()] = 1; if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: reference sequences must be aligned to use the align method, quitting.\n"); break; } addTaxonomyToTree(seq.getName(), refTaxonomy, aligned); }else { m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true; } } } referenceFile.close(); length = (lengths.begin())->first; if (error) { m->setControl_pressed(true); } numTaxa = (int)tree.size(); numLevels = 0; for(int i=0;igetLevel(); if(level > numLevels){ numLevels = level; } } numLevels++; aggregateThetas(); int dbSize = tree[0]->getNumSeqs(); for(int i=0;icheckTheta(); tree[i]->setTotalSeqs(dbSize); } } catch(exception& e) { m->errorOut(e, "AlignTree", "AlignTree"); exit(1); } } /**************************************************************************************************/ AlignTree::~AlignTree(){ try { for(int i=0;ierrorOut(e, "AlignTree", "~AlignTree"); exit(1); } } /**************************************************************************************************/ int AlignTree::addTaxonomyToTree(string seqName, string& taxonomy, string& sequence){ try { AlignNode* newNode; string taxonName = ""; int treePosition = 0; // the root is element 0 int level = 1; for(int i=0;igetControl_pressed()) { break; } if(taxonomy[i] == ';'){ // looking for semicolons... if (taxonName == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;\n"); m->setControl_pressed(true); } int newIndex = tree[treePosition]->getChildIndex(taxonName); // look to see if your current node already // has a child with the new taxonName if(newIndex != -1) { treePosition = newIndex; } // if you've seen it before, jump to that else { // position in the tree int newChildIndex = (int)tree.size(); // otherwise, we'll have to create one... tree[treePosition]->makeChild(taxonName, newChildIndex); newNode = new AlignNode(taxonName, level); newNode->setParent(treePosition); tree.push_back(newNode); treePosition = newChildIndex; } // sequence data to that node to update that node's theta - seems slow... taxonName = ""; // clear out the taxon name that we will build as we look level++; } // for a semicolon else{ taxonName += taxonomy[i]; // keep adding letters until we reach a semicolon } } tree[treePosition]->loadSequence(sequence); // now that we've gotten to the correct node, add the return 0; } catch(exception& e) { m->errorOut(e, "AlignTree", "addTaxonomyToTree"); exit(1); } } /**************************************************************************************************/ int AlignTree::aggregateThetas(){ try { vector > levelMatrix(numLevels+1); for(int i=0;igetControl_pressed()) { return 0; } levelMatrix[tree[i]->getLevel()].push_back(i); } for(int i=numLevels-1;i>0;i--){ if (m->getControl_pressed()) { return 0; } for(int j=0;jgetParent()]->addThetas(holder->getTheta(), holder->getNumSeqs()); } } return 0; } catch(exception& e) { m->errorOut(e, "AlignTree", "aggregateThetas"); exit(1); } } /**************************************************************************************************/ double AlignTree::getOutlierLogProbability(string& sequence){ try { double count = 0; for(int i=0;ierrorOut(e, "AlignTree", "getOutlierLogProbability"); exit(1); } } /**************************************************************************************************/ int AlignTree::getMinRiskIndexAlign(string& sequence, vector& taxaIndices, vector& probabilities){ try { int numProbs = (int)probabilities.size(); vector G(numProbs, 0.2); //a random sequence will, on average, be 20% similar to any other sequence vector risk(numProbs, 0); for(int i=1;igetControl_pressed()) { return 0; } G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence); } double minRisk = MOTHURMAX; int minRiskIndex = 0; for(int i=0;igetControl_pressed()) { return 0; } for(int j=0;jerrorOut(e, "AlignTree", "getMinRiskIndexAlign"); exit(1); } } /**************************************************************************************************/ int AlignTree::sanityCheck(vector >& indices, vector& maxIndices){ try { int finalLevel = (int)indices.size()-1; for(int position=1;positiongetControl_pressed()) { return 0; } int predictedParent = tree[indices[position][maxIndices[position]]]->getParent(); int actualParent = indices[position-1][maxIndices[position-1]]; if(predictedParent != actualParent){ finalLevel = position - 1; return finalLevel; } } return finalLevel; } catch(exception& e) { m->errorOut(e, "AlignTree", "sanityCheck"); exit(1); } } /**************************************************************************************************/ string AlignTree::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped){ try { simpleTax = ""; string seqName = seq->getName(); string querySequence = seq->getAligned(); string taxonProbabilityString = ""; if (querySequence.length() != length) { m->mothurOut("[ERROR]: " + seq->getName() + " has length " + toString(querySequence.length()) + ", reference sequences length is " + toString(length) + ". Are your sequences aligned? Sequences must be aligned to use the align search method.\n"); m->setControl_pressed(true); return ""; } double logPOutlier = getOutlierLogProbability(querySequence); vector > pXgivenKj_D_j(numLevels); vector > indices(numLevels); for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } pXgivenKj_D_j[i].push_back(logPOutlier); indices[i].push_back(-1); } for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(querySequence)); indices[tree[i]->getLevel()].push_back(i); } vector sumLikelihood(numLevels, 0); vector bestPosterior(numLevels, 0); vector maxIndex(numLevels, 0); int maxPosteriorIndex; //let's find the best level and taxa within that level for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } int numTaxaInLevel = (int)indices[i].size(); vector posteriors(numTaxaInLevel, 0); sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex); maxPosteriorIndex = 0; for(int j=0;j posteriors[maxPosteriorIndex]){ maxPosteriorIndex = j; } } maxIndex[i] = getMinRiskIndexAlign(querySequence, indices[i], posteriors); maxIndex[i] = maxPosteriorIndex; bestPosterior[i] = posteriors[maxIndex[i]]; } int saneDepth = sanityCheck(indices, maxIndex); simpleTax = ""; int savedspot = 1; taxonProbabilityString = ""; for(int i=1;i<=saneDepth;i++){ if (m->getControl_pressed()) { return taxonProbabilityString; } int confidenceScore = (int) (bestPosterior[i] * 100); if (confidenceScore >= confidenceThreshold) { if(indices[i][maxIndex[i]] != -1){ taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + '(' + toString(confidenceScore) + ");"; simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";"; } else{ taxonProbabilityString + "unclassified" + '(' + toString(confidenceScore) + ");"; simpleTax += "unclassified;"; } }else { break; } savedspot = i; } for(int i=savedspot+1;igetControl_pressed()) { return taxonProbabilityString; } taxonProbabilityString + "unclassified(0);"; simpleTax += "unclassified;"; } return taxonProbabilityString; } catch(exception& e) { m->errorOut(e, "AlignTree", "getTaxonomy"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/aligntree.h000066400000000000000000000013351424121717000206720ustar00rootroot00000000000000// // alignTree.h // pdsBayesian // // Created by Patrick Schloss on 4/3/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #ifndef pdsBayesian_alignTree_h #define pdsBayesian_alignTree_h #include "classify.h" class AlignNode; class AlignTree : public Classify { public: AlignTree(string, string, int); ~AlignTree(); string getTaxonomy(Sequence*, string&, bool&); private: int addTaxonomyToTree(string, string&, string&); double getOutlierLogProbability(string&); int getMinRiskIndexAlign(string&, vector&, vector&); int aggregateThetas(); int sanityCheck(vector >&, vector&); int numSeqs, confidenceThreshold, length; vector tree; }; #endif mothur-1.48.0/source/classifier/bayesian.cpp000066400000000000000000000373701424121717000210560ustar00rootroot00000000000000/* * bayesian.cpp * Mothur * * Created by westcott on 11/3/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "bayesian.h" #include "kmer.hpp" #include "phylosummary.h" /**************************************************************************************************/ Bayesian::Bayesian(string txfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f, bool sh, string version) : Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { try { threadID = tid; flip = f; shortcuts = sh; string baseName = tempFile; string baseTName = txfile; Utils util; /************calculate the probablity that each word will be in a specific taxonomy*************/ string tfileroot = util.getFullPathName(baseTName.substr(0,baseTName.find_last_of(".")+1)); string tempfileroot = util.getRootName(util.getSimpleName(baseName)); string phyloTreeName = tfileroot + "tree.train"; string phyloTreeSumName = tfileroot + "tree.sum"; string probFileName = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.prob"; string probFileName2 = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.numNonZero"; ofstream out; ofstream out2; vector files; ifstream* phyloTreeTest = new ifstream(phyloTreeName.c_str()); files.push_back(phyloTreeTest); ifstream* probFileTest2 = new ifstream(probFileName2.c_str()); files.push_back(probFileTest2); ifstream* probFileTest = new ifstream(probFileName.c_str()); files.push_back(probFileTest); ifstream* probFileTest3 = new ifstream(phyloTreeSumName.c_str()); files.push_back(probFileTest3); long start = time(nullptr); //if they are there make sure they were created after this release date bool FilesGood = false; if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){ FilesGood = checkReleaseDate(files, version); } if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){ m->mothurOut("Reading template taxonomy... "); cout.flush(); phyloTree = new PhyloTree(*phyloTreeTest, phyloTreeName); maxLevel = phyloTree->getMaxLevel(); m->mothurOut("DONE.\n"); genusNodes = phyloTree->getGenusNodes(); genusTotals = phyloTree->getGenusTotals(); m->mothurOut("Reading template probabilities... "); cout.flush(); readProbFile(*probFileTest, *probFileTest2, probFileName, probFileName2); }else{ //create search database and names vector generateDatabaseAndNames(txfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0, version); //prevents errors caused by creating shortcut files if you had an error in the sanity check. if (m->getControl_pressed()) { util.mothurRemove(phyloTreeName); util.mothurRemove(probFileName); util.mothurRemove(probFileName2); } else{ genusNodes = phyloTree->getGenusNodes(); genusTotals = phyloTree->getGenusTotals(); m->mothurOut("Calculating template taxonomy tree... "); cout.flush(); phyloTree->printTreeNodes(phyloTreeName); m->mothurOut("DONE.\n"); m->mothurOut("Calculating template probabilities... "); cout.flush(); numKmers = database->getMaxKmer() + 1; //initialze probabilities wordGenusProb.resize(numKmers); for (int j = 0; j < numKmers; j++) { diffPair tempDiffPair; WordPairDiffArr.push_back(tempDiffPair); } for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size(), 0.0); } ofstream out; ofstream out2; if (shortcuts) { util.openOutputFile(probFileName, out); //output mothur version out << "#" << version << endl; out << numKmers << endl; util.openOutputFile(probFileName2, out2); //output mothur version out2 << "#" << version << endl; } //for each word for (int i = 0; i < numKmers; i++) { //m->mothurOut("[DEBUG]: kmer = " + toString(i) + "\n"); if (m->getControl_pressed()) { break; } if (shortcuts) { out << i << '\t'; } vector seqsWithWordi = database->getSequencesWithKmer(i); //for each sequence with that word vector count; count.resize(genusNodes.size(), 0); for (int j = 0; j < seqsWithWordi.size(); j++) { int temp = phyloTree->getGenusIndex(names[seqsWithWordi[j]]); count[temp]++; //increment count of seq in this genus who have this word } //probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1); float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1); diffPair tempProb(log(probabilityInTemplate), 0.0); WordPairDiffArr[i] = tempProb; int numNotZero = 0; for (int k = 0; k < genusNodes.size(); k++) { //probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1); wordGenusProb[i][k] = log((count[k] + probabilityInTemplate) / (float) (genusTotals[k] + 1)); if (count[k] != 0) { if (shortcuts) { out << k << '\t' << wordGenusProb[i][k] << '\t' ; } numNotZero++; } } if (shortcuts) { out << endl; out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl; } } if (shortcuts) { out.close(); out2.close(); } //read in new phylotree with less info. - its faster ifstream phyloTreeTest(phyloTreeName.c_str()); delete phyloTree; phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName); maxLevel = phyloTree->getMaxLevel(); } } if (m->getDebug()) { m->mothurOut("[DEBUG]: about to generateWordPairDiffArr\n"); } generateWordPairDiffArr(); if (m->getDebug()) { m->mothurOut("[DEBUG]: done generateWordPairDiffArr\n"); } for (int i = 0; i < files.size(); i++) { delete files[i]; } m->mothurOut("DONE.\n"); m->mothurOut("It took " + toString(time(nullptr) - start) + " seconds get probabilities.\n"); } catch(exception& e) { m->errorOut(e, "Bayesian", "Bayesian"); exit(1); } } /**************************************************************************************************/ Bayesian::~Bayesian() { try { if (phyloTree != nullptr) { delete phyloTree; } if (database != nullptr) { delete database; } } catch(exception& e) { m->errorOut(e, "Bayesian", "~Bayesian"); exit(1); } } /**************************************************************************************************/ string Bayesian::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped) { try { string tax = ""; simpleTax = ""; Kmer kmer(kmerSize); flipped = false; //get words contained in query //getKmerString returns a string where the index in the string is hte kmer number //and the character at that index can be converted to be the number of times that kmer was seen string queryKmerString = kmer.getKmerString(seq->getUnaligned()); vector queryKmers; for (int i = 0; i < queryKmerString.length()-1; i++) { // the -1 is to ignore any kmer with an N in it if (queryKmerString[i] != '!') { //this kmer is in the query queryKmers.push_back(i); } } //if user wants to test reverse compliment and its reversed use that instead if (flip) { if (isReversed(queryKmers)) { flipped = true; seq->reverseComplement(); queryKmerString = kmer.getKmerString(seq->getUnaligned()); queryKmers.clear(); for (int i = 0; i < queryKmerString.length()-1; i++) { // the -1 is to ignore any kmer with an N in it if (queryKmerString[i] != '!') { //this kmer is in the query queryKmers.push_back(i); } } } } if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + " is bad. It has no kmers of length " + toString(kmerSize) + ".\n"); simpleTax = "unknown;"; return "unknown;"; } int index = getMostProbableTaxonomy(queryKmers); if (m->getControl_pressed()) { return tax; } //bootstrap - to set confidenceScore int numToSelect = queryKmers.size() / 8; if (m->getDebug()) { m->mothurOut(seq->getName() + "\t"); } tax = bootstrapResults(queryKmers, index, numToSelect, simpleTax); if (m->getDebug()) { m->mothurOut("\n"); } return tax; } catch(exception& e) { m->errorOut(e, "Bayesian", "getTaxonomy"); exit(1); } } /**************************************************************************************************/ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect, string& simpleTax) { try { map confidenceScores; //initialize confidences to 0 int seqIndex = tax; TaxNode seq = phyloTree->get(tax); confidenceScores[tax] = 0; while (seq.level != 0) { //while you are not at the root seqIndex = seq.parent; confidenceScores[seqIndex] = 0; seq = phyloTree->get(seq.parent); } map::iterator itBoot; map::iterator itBoot2; map::iterator itConvert; int numKmers = kmers.size()-1; Utils util; for (int i = 0; i < iters; i++) { if (m->getControl_pressed()) { return "control"; } vector temp; for (int j = 0; j < numToSelect; j++) { int index = util.getRandomIndex(numKmers); //add word to temp temp.push_back(kmers[index]); } //get taxonomy int newTax = getMostProbableTaxonomy(temp); //int newTax = 1; TaxNode taxonomyTemp = phyloTree->get(newTax); //add to confidence results while (taxonomyTemp.level != 0) { //while you are not at the root itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on if (itBoot2 != confidenceScores.end()) { //this is a classification we need a confidence for (itBoot2->second)++; } newTax = taxonomyTemp.parent; taxonomyTemp = phyloTree->get(newTax); } } string confidenceTax = ""; simpleTax = ""; int seqTaxIndex = tax; TaxNode seqTax = phyloTree->get(tax); while (seqTax.level != 0) { //while you are not at the root itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on int confidence = 0; if (itBoot2 != confidenceScores.end()) { //already in confidence scores confidence = itBoot2->second; } if (m->getDebug()) { m->mothurOut(seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");"); } if (((confidence/(float)iters) * 100) >= confidenceThreshold) { confidenceTax = seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");" + confidenceTax; simpleTax = seqTax.name + ";" + simpleTax; } seqTaxIndex = seqTax.parent; seqTax = phyloTree->get(seqTax.parent); } if (confidenceTax == "") { confidenceTax = "unknown;"; simpleTax = "unknown;"; } return confidenceTax; } catch(exception& e) { m->errorOut(e, "Bayesian", "bootstrapResults"); exit(1); } } /**************************************************************************************************/ int Bayesian::getMostProbableTaxonomy(vector queryKmer) { try { int indexofGenus = 0; double maxProbability = -1000000.0; //find taxonomy with highest probability that this sequence is from it for (int k = 0; k < genusNodes.size(); k++) { //for each taxonomy calc its probability double prob = 0.0000; for (int i = 0; i < queryKmer.size(); i++) { prob += wordGenusProb[queryKmer[i]][k]; } //is this the taxonomy with the greatest probability? if (prob > maxProbability) { indexofGenus = genusNodes[k]; maxProbability = prob; } } return indexofGenus; } catch(exception& e) { m->errorOut(e, "Bayesian", "getMostProbableTaxonomy"); exit(1); } } //******************************************************************************************************************** //if it is more probable that the reverse compliment kmers are in the template, then we assume the sequence is reversed. bool Bayesian::isReversed(vector& queryKmers){ try{ bool reversed = false; float prob = 0; float reverseProb = 0; for (int i = 0; i < queryKmers.size(); i++){ int kmer = queryKmers[i]; if (kmer >= 0){ prob += WordPairDiffArr[kmer].prob; reverseProb += WordPairDiffArr[kmer].reverseProb; } } if (reverseProb > prob){ reversed = true; } return reversed; } catch(exception& e) { m->errorOut(e, "Bayesian", "isReversed"); exit(1); } } //******************************************************************************************************************** int Bayesian::generateWordPairDiffArr(){ try{ Kmer kmer(kmerSize); for (int i = 0; i < WordPairDiffArr.size(); i++) { int reversedWord = kmer.getReverseKmerNumber(i); WordPairDiffArr[i].reverseProb = WordPairDiffArr[reversedWord].prob; } return 0; }catch(exception& e) { m->errorOut(e, "Bayesian", "generateWordPairDiffArr"); exit(1); } } /**************************************************************************************************/ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) { try{ Utils util; //read version string line = util.getline(in); gobble(in); in >> numKmers; gobble(in); //initialze probabilities wordGenusProb.resize(numKmers); for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size()); } int kmer, name, count; count = 0; vector num; num.resize(numKmers); float prob; vector zeroCountProb; zeroCountProb.resize(numKmers); for (int j = 0; j < numKmers; j++) { diffPair tempDiffPair; WordPairDiffArr.push_back(tempDiffPair); } //read version string line2 = util.getline(inNum); gobble(inNum); float probTemp; while (inNum) { inNum >> zeroCountProb[count] >> num[count] >> probTemp; WordPairDiffArr[count].prob = probTemp; count++; gobble(inNum); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(zeroCountProb[count]) + '\t' + toString(num[count]) + '\t' + toString(numKmers) + "\n"); } } inNum.close(); while(in) { in >> kmer; //set them all to zero value for (int i = 0; i < genusNodes.size(); i++) { wordGenusProb[kmer][i] = log(zeroCountProb[kmer] / (float) (genusTotals[i]+1)); } //get probs for nonzero values for (int i = 0; i < num[kmer]; i++) { in >> name >> prob; wordGenusProb[kmer][name] = prob; if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(name) + '\t' + toString(prob) + '\t' + toString(kmer) + "\n"); } } gobble(in); } in.close(); } catch(exception& e) { m->errorOut(e, "Bayesian", "readProbFile"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/bayesian.h000077500000000000000000000024261424121717000205200ustar00rootroot00000000000000#ifndef BAYESIAN_H #define BAYESIAN_H /* * bayesian.h * Mothur * * Created by westcott on 11/3/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "classify.h" /**************************************************************************************************/ class Bayesian : public Classify { public: Bayesian(string, string, string, int, int, int, int, bool, bool, string); ~Bayesian(); string getTaxonomy(Sequence*, string&, bool&); private: vector< vector > wordGenusProb; //vector of maps from genus to probability //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0; vector genusTotals; vector genusNodes; //indexes in phyloTree where genus' are located vector WordPairDiffArr; int kmerSize, numKmers, confidenceThreshold, iters; string bootstrapResults(vector, int, int, string&); int getMostProbableTaxonomy(vector); void readProbFile(ifstream&, ifstream&, string, string); bool isReversed(vector&); vector createWordIndexArr(Sequence*); int generateWordPairDiffArr(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/classify.cpp000077500000000000000000000200051424121717000210660ustar00rootroot00000000000000/* * classify.cpp * Mothur * * Created by westcott on 11/3/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "classify.h" #include "sequence.hpp" #include "kmerdb.hpp" #include "suffixdb.hpp" #include "distancedb.hpp" #include "optidb.hpp" /**************************************************************************************************/ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, string version) { try { Utils util; int numSeqs = 0; maxLevel = 0; taxFile = tfile; templateFile = tempFile; long start = time(nullptr); m->mothurOut("Generating search database... "); cout.flush(); //need to know number of template seqs for suffixdb if (method == "suffix") { ifstream inFASTA; util.openInputFile(tempFile, inFASTA); util.getNumSeqs(inFASTA, numSeqs); inFASTA.close(); } bool needToGenerate = true; string dBName; if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); dBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTest(dBName.c_str()); if(kmerFileTest){ string line = util.getline(kmerFileTest); bool GoodFile = util.checkReleaseVersion(line, version); kmerFileTest.close(); int shortcutTimeStamp = util.getTimeStamp(dBName); int referenceTimeStamp = util.getTimeStamp(tempFile); //if the shortcut file is older then the reference file, remake shortcut file if (shortcutTimeStamp < referenceTimeStamp) { GoodFile = false; } if (GoodFile) { needToGenerate = false; } } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.\n"); database = new KmerDB(tempFile, 8); } if (!m->getControl_pressed()) { if (needToGenerate) { ifstream fastaFile; util.openInputFile(tempFile, fastaFile); while (!fastaFile.eof()) { Sequence temp(fastaFile); gobble(fastaFile); names.push_back(temp.getName()); database->addSequence(temp); } fastaFile.close(); if ((method == "kmer") && (!shortcuts)) {;} //don't print else {database->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream FileTest(dBName.c_str()); database->readDB(FileTest); ifstream fastaFile; util.openInputFile(tempFile, fastaFile); while (!fastaFile.eof()) { Sequence temp(fastaFile); gobble(fastaFile); names.push_back(temp.getName()); } fastaFile.close(); } database->setNumSeqs(names.size()); m->mothurOut("DONE.\nIt took " + toString(time(nullptr) - start) + " seconds generate search database.\n"); readTaxonomy(taxFile); //sanity check bool okay = phyloTree->ErrorCheck(names); if (!okay) { m->setControl_pressed(true); } } } catch(exception& e) { m->errorOut(e, "Classify", "generateDatabaseAndNames"); exit(1); } } /**************************************************************************************************/ Classify::Classify() { m = MothurOut::getInstance(); database = nullptr; phyloTree=nullptr; maxLevel = 0; } /**************************************************************************************************/ int Classify::readTaxonomy(string file) { try { phyloTree = new PhyloTree(); string name, taxInfo; m->mothurOut("\nReading in the " + file + " taxonomy...\t"); cout.flush(); if (m->getDebug()) { m->mothurOut("[DEBUG]: Taxonomies read in...\n"); } taxonomy.clear(); Utils util; util.readTax(file, taxonomy, true); for (map::iterator itTax = taxonomy.begin(); itTax != taxonomy.end(); itTax++) { phyloTree->addSeqToTree(itTax->first, itTax->second); if (m->getControl_pressed()) { break; } } phyloTree->assignHeirarchyIDs(0); maxLevel = phyloTree->getMaxLevel(); phyloTree->setUp(file); m->mothurOut("DONE.\n"); cout.flush(); return phyloTree->getNumSeqs(); } catch(exception& e) { m->errorOut(e, "Classify", "readTaxonomy"); exit(1); } } /**************************************************************************************************/ vector Classify::parseTax(string tax) { try { vector taxons; Utils util; util.splitAtChar(tax, taxons, ';'); return taxons; } catch(exception& e) { m->errorOut(e, "Classify", "parseTax"); exit(1); } } /**************************************************************************************************/ double Classify::getLogExpSum(vector probabilities, int& maxIndex){ try { // http://jblevins.org/notes/log-sum-exp double maxProb = probabilities[0]; maxIndex = 0; int numProbs = (int)probabilities.size(); for(int i=1;i= maxProb){ maxProb = probabilities[i]; maxIndex = i; } } double probSum = 0.0000; for(int i=0;ierrorOut(e, "Classify", "getLogExpSum"); exit(1); } } /**************************************************************************************************/ bool Classify::checkReleaseDate(vector& files, string version) { try { Utils util; bool good = true; vector versionVector; util.splitAtChar(version, versionVector, '.'); for (int i = 0; i < files.size(); i++) { string line = util.getline(*files[i]); if (line[0] != '#') { good = false; break; } //shortcut files from before we added this check else { line = line.substr(1); vector linesVector; util.splitAtChar(line, linesVector, '.'); if (versionVector.size() != linesVector.size()) { good = false; break; } else { for (int j = 0; j < versionVector.size(); j++) { int num1, num2; convert(versionVector[j], num1); convert(linesVector[j], num2); //if mothurs version is newer than this files version, then we want to remake it if (num1 > num2) { good = false; break; } } } if (!good) { break; } } } if (!good) { for (int i = 0; i < files.size(); i++) { files[i]->close(); } } else { for (int i = 0; i < files.size(); i++) { files[i]->seekg(0); } } return good; } catch(exception& e) { m->errorOut(e, "Classify", "checkReleaseDate"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/classify.h000077500000000000000000000027011424121717000205360ustar00rootroot00000000000000#ifndef CLASSIFY_H #define CLASSIFY_H /* * classify.h * Mothur * * Created by westcott on 11/3/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ /* This class is a parent to bayesian, knn. */ #include "mothur.h" #include "searchdatabase.hpp" #include "phylotree.h" class Sequence; /**************************************************************************************************/ class Classify { public: Classify(); virtual ~Classify(){}; virtual string getTaxonomy(Sequence*, string&, bool&) = 0; int getMaxLevel() { return maxLevel; } virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place. protected: map taxonomy; //name maps to taxonomy map::iterator itTax; map::iterator it; SearchDatabase* database; PhyloTree* phyloTree; string taxFile, templateFile; vector names; int threadID, numLevels, numTaxa, maxLevel; bool flip, shortcuts; MothurOut* m; int readTaxonomy(string); vector parseTax(string); double getLogExpSum(vector, int&); bool checkReleaseDate(vector&, string); virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/kmernode.cpp000077500000000000000000000153661424121717000210730ustar00rootroot00000000000000/* * kmerNode.cpp * bayesian * * Created by Pat Schloss on 10/11/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "kmernode.h" /**********************************************************************************************************************/ KmerNode::KmerNode(string s, int l, int n) : TaxonomyNode(s, l), kmerSize(n) { try { int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; numPossibleKmers = power4s[kmerSize]; numUniqueKmers = 0; kmerVector.assign(numPossibleKmers, 0); } catch(exception& e) { m->errorOut(e, "KmerNode", "KmerNode"); exit(1); } } /**********************************************************************************************************************/ void KmerNode::loadSequence(vector& kmerProfile){ try { for(int i=0;igetControl_pressed()) { break; } if(kmerVector[i] == 0 && kmerProfile[i] != 0) { numUniqueKmers++; } kmerVector[i] += kmerProfile[i]; } numSeqs++; } catch(exception& e) { m->errorOut(e, "KmerNode", "loadSequence"); exit(1); } } /**********************************************************************************************************************/ string KmerNode::getKmerBases(int kmerNumber){ try { // Here we convert the kmer number into the kmer in terms of bases. // // Example: Score = 915 (for a 6-mer) // Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T [T] // Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A [AT] // Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C [CAT] // Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G [GCAT] // Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T [TGCAT] // Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A [ATGCAT] -> this checks out with the previous method int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; string kmer = ""; if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){ // if the kmer number is the same as the maxKmer then it must for(int i=0;igetControl_pressed()) { return kmer; } int nt = (int)(kmerNumber / (float)power4s[i]) % 4; // the '%' operator returns the remainder if(nt == 0) { kmer = 'A' + kmer; } // from int-based division ] else if(nt == 1){ kmer = 'C' + kmer; } else if(nt == 2){ kmer = 'G' + kmer; } else if(nt == 3){ kmer = 'T' + kmer; } } } return kmer; } catch(exception& e) { m->errorOut(e, "KmerNode", "getKmerBases"); exit(1); } } /**************************************************************************************************/ void KmerNode::addThetas(vector newTheta, int newNumSeqs){ try { for(int i=0;igetControl_pressed()) { break; } kmerVector[i] += newTheta[i]; } // if(alignLength == 0){ // alignLength = (int)newTheta.size(); // theta.resize(alignLength); // columnCounts.resize(alignLength); // } // // for(int i=0;ierrorOut(e, "KmerNode", "addThetas"); exit(1); } } /**********************************************************************************************************************/ int KmerNode::getNumUniqueKmers(){ try { if(numUniqueKmers == 0){ for(int i=0;igetControl_pressed()) { return numUniqueKmers; } if(kmerVector[i] != 0){ numUniqueKmers++; } } } return numUniqueKmers; } catch(exception& e) { m->errorOut(e, "KmerNode", "getNumUniqueKmers"); exit(1); } } /**********************************************************************************************************************/ void KmerNode::printTheta(){ try { m->mothurOut(name + "\n"); for(int i=0;imothurOut(getKmerBases(i) + '\t' + toString(kmerVector[i]) + "\n"); } } m->mothurOutEndLine(); } catch(exception& e) { m->errorOut(e, "KmerNode", "printTheta"); exit(1); } } /**************************************************************************************************/ double KmerNode::getSimToConsensus(vector& queryKmerProfile){ try { double present = 0; for(int i=0;igetControl_pressed()) { return present; } if(queryKmerProfile[i] != 0 && kmerVector[i] != 0){ present++; } } return (present / double(queryKmerProfile.size() - kmerSize + 1)); } catch(exception& e) { m->errorOut(e, "KmerNode", "getSimToConsensus"); exit(1); } } /**********************************************************************************************************************/ double KmerNode::getPxGivenkj_D_j(vector& queryKmerProfile) { try { double sumLogProb = 0.0000; double alpha = 1.0 / (double)totalSeqs; //flat prior // double alpha = pow((1.0 / (double)numUniqueKmers), numSeqs)+0.0001; //non-flat prior for(int i=0;igetControl_pressed()) { return sumLogProb; } if(queryKmerProfile[i] != 0){ //numUniqueKmers needs to be the value from Root; sumLogProb += log((kmerVector[i] + alpha) / (numSeqs + numUniqueKmers * alpha)); } } return sumLogProb; } catch(exception& e) { m->errorOut(e, "KmerNode", "getPxGivenkj_D_j"); exit(1); } } /**********************************************************************************************************************/ mothur-1.48.0/source/classifier/kmernode.h000077500000000000000000000022461424121717000205310ustar00rootroot00000000000000#ifndef KMERNODE #define KMERNODE /* * kmerNode.h * bayesian * * Created by Pat Schloss on 10/11/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "taxonomynode.h" /**********************************************************************************************************************/ class KmerNode : public TaxonomyNode { public: KmerNode(string, int, int); void loadSequence(vector&); void printTheta(); double getPxGivenkj_D_j(vector&); double getSimToConsensus(vector&); void checkTheta(){}; void setNumUniqueKmers(int num) { numUniqueKmers = num; } int getNumUniqueKmers(); void addThetas(vector, int); vector getTheta() { return kmerVector; } private: string getKmerBases(int); int kmerSize; // value of k int numPossibleKmers; // 4^kmerSize int numUniqueKmers; // number of unique kmers seen in a group ~ O_kj int numKmers; // number of kmers in a sequence vector kmerVector; // counts of kmers across all sequences in a node }; /**********************************************************************************************************************/ #endif mothur-1.48.0/source/classifier/kmertree.cpp000077500000000000000000000321331424121717000210740ustar00rootroot00000000000000// // kmerTree.cpp // pdsBayesian // // Created by Patrick Schloss on 4/3/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #include "kmernode.h" #include "kmertree.h" /**************************************************************************************************/ KmerTree::KmerTree(string referenceFileName, string taxonomyFileName, int k, int cutoff) : Classify(), confidenceThreshold(cutoff), kmerSize(k){ try { KmerNode* newNode = new KmerNode("Root", 0, kmerSize); tree.push_back(newNode); // the tree is stored as a vector of elements of type TaxonomyNode int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; numPossibleKmers = power4s[kmerSize]; string refTaxonomy; readTaxonomy(taxonomyFileName); ifstream referenceFile; Utils util; util.openInputFile(referenceFileName, referenceFile); bool error = false; while(!referenceFile.eof()){ if (m->getControl_pressed()) { break; } Sequence seq(referenceFile); gobble(referenceFile); if (seq.getName() != "") { map::iterator it = taxonomy.find(seq.getName()); if (it != taxonomy.end()) { refTaxonomy = it->second; // lookup the taxonomy string for the current reference sequence vector kmerProfile = ripKmerProfile(seq.getUnaligned()); //convert to kmer vector addTaxonomyToTree(seq.getName(), refTaxonomy, kmerProfile); }else { m->mothurOut(seq.getName() + " is in your reference file, but not in your taxonomy file, please correct.\n"); error = true; } } } referenceFile.close(); if (error) { m->setControl_pressed(true); } numTaxa = (int)tree.size(); numLevels = 0; for(int i=0;igetLevel(); if(level > numLevels){ numLevels = level; } } numLevels++; aggregateThetas(); int dbSize = tree[0]->getNumSeqs(); for(int i=0;icheckTheta(); tree[i]->setNumUniqueKmers(tree[0]->getNumUniqueKmers()); tree[i]->setTotalSeqs(dbSize); } } catch(exception& e) { m->errorOut(e, "KmerTree", "KmerTree"); exit(1); } } /**************************************************************************************************/ KmerTree::~KmerTree(){ for(int i=0;i KmerTree::ripKmerProfile(string sequence){ try { // assume all input sequences are unaligned int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; int nKmers = (int)sequence.length() - kmerSize + 1; vector kmerProfile(numPossibleKmers + 1, 0); for(int i=0;igetControl_pressed()) { break; } int kmer = 0; for(int j=0;jerrorOut(e, "KmerTree", "ripKmerProfile"); exit(1); } } /**************************************************************************************************/ int KmerTree::addTaxonomyToTree(string seqName, string taxonomy, vector& sequence){ try { KmerNode* newNode; string taxonName = ""; int treePosition = 0; // the root is element 0 int level = 1; for(int i=0;igetControl_pressed()) { break; } if(taxonomy[i] == ';'){ // looking for semicolons... if (taxonName == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;\n"); m->setControl_pressed(true); } int newIndex = tree[treePosition]->getChildIndex(taxonName);// look to see if your current node already // has a child with the new taxonName if(newIndex != -1) { treePosition = newIndex; } // if you've seen it before, jump to that else { // position in the tree int newChildIndex = (int)tree.size(); // otherwise, we'll have to create one... tree[treePosition]->makeChild(taxonName, newChildIndex); newNode = new KmerNode(taxonName, level, kmerSize); newNode->setParent(treePosition); tree.push_back(newNode); treePosition = newChildIndex; } // sequence data to that node to update that node's theta - seems slow... taxonName = ""; // clear out the taxon name that we will build as we look level++; } // for a semicolon else{ taxonName += taxonomy[i]; // keep adding letters until we reach a semicolon } } tree[treePosition]->loadSequence(sequence); // now that we've gotten to the correct node, add the return 0; } catch(exception& e) { m->errorOut(e, "KmerTree", "addTaxonomyToTree"); exit(1); } } /**************************************************************************************************/ int KmerTree::aggregateThetas(){ try { vector > levelMatrix(numLevels+1); for(int i=0;igetControl_pressed()) { return 0; } levelMatrix[tree[i]->getLevel()].push_back(i); } for(int i=numLevels-1;i>0;i--) { if (m->getControl_pressed()) { return 0; } for(int j=0;jgetParent()]->addThetas(holder->getTheta(), holder->getNumSeqs()); } } return 0; } catch(exception& e) { m->errorOut(e, "KmerTree", "aggregateThetas"); exit(1); } } /**************************************************************************************************/ int KmerTree::getMinRiskIndexKmer(vector& sequence, vector& taxaIndices, vector& probabilities){ try { int numProbs = (int)probabilities.size(); vector G(numProbs, 0.2); //a random sequence will, on average, be 20% similar to any other sequence; not sure that this holds up for kmers; whatever. vector risk(numProbs, 0); for(int i=1;igetControl_pressed()) { return 0; } G[i] = tree[taxaIndices[i]]->getSimToConsensus(sequence); } double minRisk = MOTHURMAX; int minRiskIndex = 0; for(int i=0;igetControl_pressed()) { return 0; } for(int j=0;jerrorOut(e, "KmerTree", "getMinRiskIndexKmer"); exit(1); } } /**************************************************************************************************/ int KmerTree::sanityCheck(vector >& indices, vector& maxIndices){ try { int finalLevel = (int)indices.size()-1; for(int position=1;positiongetControl_pressed()) { return 0; } int predictedParent = tree[indices[position][maxIndices[position]]]->getParent(); int actualParent = indices[position-1][maxIndices[position-1]]; if(predictedParent != actualParent){ finalLevel = position - 1; return finalLevel; } } return finalLevel; } catch(exception& e) { m->errorOut(e, "KmerTree", "sanityCheck"); exit(1); } } /**************************************************************************************************/ string KmerTree::getTaxonomy(Sequence* thisSeq, string& simpleTax, bool& flipped){ try { simpleTax = ""; string seqName = thisSeq->getName(); string querySequence = thisSeq->getAligned(); string taxonProbabilityString = ""; string unalignedSeq = thisSeq->getUnaligned(); double logPOutlier = (querySequence.length() - kmerSize + 1) * log(1.0/(double)tree[0]->getNumUniqueKmers()); vector queryProfile = ripKmerProfile(unalignedSeq); //convert to kmer vector vector > pXgivenKj_D_j(numLevels); vector > indices(numLevels); for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } pXgivenKj_D_j[i].push_back(logPOutlier); indices[i].push_back(-1); } for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } pXgivenKj_D_j[tree[i]->getLevel()].push_back(tree[i]->getPxGivenkj_D_j(queryProfile)); indices[tree[i]->getLevel()].push_back(i); } vector sumLikelihood(numLevels, 0); vector bestPosterior(numLevels, 0); vector maxIndex(numLevels, 0); int maxPosteriorIndex; //let's find the best level and taxa within that level for(int i=0;igetControl_pressed()) { return taxonProbabilityString; } int numTaxaInLevel = (int)indices[i].size(); vector posteriors(numTaxaInLevel, 0); sumLikelihood[i] = getLogExpSum(pXgivenKj_D_j[i], maxPosteriorIndex); maxPosteriorIndex = 0; for(int j=0;j posteriors[maxPosteriorIndex]){ maxPosteriorIndex = j; } } maxIndex[i] = getMinRiskIndexKmer(queryProfile, indices[i], posteriors); maxIndex[i] = maxPosteriorIndex; bestPosterior[i] = posteriors[maxIndex[i]]; } int saneDepth = sanityCheck(indices, maxIndex); simpleTax = ""; int savedspot = 1; taxonProbabilityString = ""; for(int i=1;i<=saneDepth;i++){ if (m->getControl_pressed()) { return taxonProbabilityString; } int confidenceScore = (int) (bestPosterior[i] * 100); if (confidenceScore >= confidenceThreshold) { if(indices[i][maxIndex[i]] != -1){ taxonProbabilityString += tree[indices[i][maxIndex[i]]]->getName() + "(" + toString(confidenceScore) + ");"; simpleTax += tree[indices[i][maxIndex[i]]]->getName() + ";"; } else{ taxonProbabilityString += "unclassified(" + toString(confidenceScore) + ");"; simpleTax += "unclassified;"; } }else { break; } savedspot = i; } for(int i=savedspot+1;igetControl_pressed()) { return taxonProbabilityString; } taxonProbabilityString += "unclassified(0);"; simpleTax += "unclassified;"; } return taxonProbabilityString; } catch(exception& e) { m->errorOut(e, "KmerTree", "getTaxonomy"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/kmertree.h000077500000000000000000000013621424121717000205410ustar00rootroot00000000000000// // kmerTree.h // pdsBayesian // // Created by Patrick Schloss on 4/3/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #ifndef pdsBayesian_kmerTree_h #define pdsBayesian_kmerTree_h #include "classify.h" class KmerNode; class KmerTree : public Classify { public: KmerTree(string, string, int, int); ~KmerTree(); string getTaxonomy(Sequence*, string&, bool&); private: int addTaxonomyToTree(string, string, vector&); vector ripKmerProfile(string); int getMinRiskIndexKmer(vector&, vector&, vector&); int aggregateThetas(); int sanityCheck(vector >&, vector&); int kmerSize; int numPossibleKmers, confidenceThreshold; vector tree; }; #endif mothur-1.48.0/source/classifier/knn.cpp000077500000000000000000000105231424121717000200430ustar00rootroot00000000000000/* * knn.cpp * Mothur * * Created by westcott on 11/4/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "knn.h" /**************************************************************************************************/ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid, string version) : Classify(), num(n), search(method) { try { threadID = tid; shortcuts = true; //create search database and names vector generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch, version); } catch(exception& e) { m->errorOut(e, "Knn", "Knn"); exit(1); } } /**************************************************************************************************/ void Knn::setDistName(string s) { try { outDistName = s; ofstream outDistance; Utils util; util.openOutputFile(outDistName, outDistance); outDistance << "Name\tBestMatch\tDistance" << endl; outDistance.close(); } catch(exception& e) { m->errorOut(e, "Knn", "setDistName"); exit(1); } } /**************************************************************************************************/ Knn::~Knn() { try { delete phyloTree; if (database != nullptr) { delete database; } } catch(exception& e) { m->errorOut(e, "Knn", "~Knn"); exit(1); } } /**************************************************************************************************/ string Knn::getTaxonomy(Sequence* seq, string& simpleTax, bool& flipped) { try { string tax; simpleTax = ""; //use database to find closest seq vector Scores; vector closest = database->findClosestSequences(seq, num, Scores); Utils util; if (search == "distance") { ofstream outDistance; util.openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << Scores[0] << endl; outDistance.close(); } if (m->getControl_pressed()) { return tax; } vector closestNames; for (int i = 0; i < closest.size(); i++) { //find that sequences taxonomy in map it = taxonomy.find(names[closest[i]]); //is this sequence in the taxonomy file if (it == taxonomy.end()) { //error not in file m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + ".\n"); }else{ closestNames.push_back(it->first); } } if (closestNames.size() == 0) { m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. \n"); tax = "unknown;"; }else{ tax = findCommonTaxonomy(closestNames); if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ".\n"); tax = "unknown;"; } } simpleTax = tax; return tax; } catch(exception& e) { m->errorOut(e, "Knn", "getTaxonomy"); exit(1); } } /**************************************************************************************************/ string Knn::findCommonTaxonomy(vector closest) { try { string conTax; //create a tree containing sequences from this bin PhyloTree p; for (int i = 0; i < closest.size(); i++) { p.addSeqToTree(closest[i], taxonomy[closest[i]]); } //build tree p.assignHeirarchyIDs(0); TaxNode currentNode = p.get(0); //at each level while (currentNode.children.size() != 0) { //you still have more to explore TaxNode bestChild; int bestChildSize = 0; //go through children for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { TaxNode temp = p.get(itChild->second); //select child with largest accessions - most seqs assigned to it if (temp.accessions.size() > bestChildSize) { bestChild = p.get(itChild->second); bestChildSize = temp.accessions.size(); } } if (bestChildSize == closest.size()) { //if yes, add it conTax += bestChild.name + ";"; }else{ //if no, quit break; } //move down a level currentNode = bestChild; } return conTax; } catch(exception& e) { m->errorOut(e, "Knn", "findCommonTaxonomy"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/knn.h000077500000000000000000000013171424121717000175110ustar00rootroot00000000000000#ifndef KNN_H #define KNN_H /* * knn.h * Mothur * * Created by westcott on 11/4/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "classify.h" /**************************************************************************************************/ class Knn : public Classify { public: Knn(string, string, string, int, float, float, float, float, int, int, string); ~Knn(); void setDistName(string s); string getTaxonomy(Sequence*, string&, bool&); private: int num; string findCommonTaxonomy(vector); string search, outDistName; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/phylosummary.cpp000077500000000000000000000655701424121717000220420ustar00rootroot00000000000000/* * rawTrainingDataMaker.cpp * Mothur * * Created by westcott on 4/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "phylosummary.h" /**************************************************************************************************/ PhyloSummary::PhyloSummary(string refTfile, CountTable* c, bool r, int p){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = false; numSeqs = 0; relabund = r; printlevel = p; ct = c; groupmap = nullptr; //check for necessary files string taxFileNameTest = util.getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); ifstream FileTest(taxFileNameTest.c_str()); if (!FileTest) { m->mothurOut("Error: can't find " + taxFileNameTest + ".\n"); exit(1); }else{ readTreeStruct(FileTest); } tree[0].rank = "0"; assignRank(0); } catch(exception& e) { m->errorOut(e, "PhyloSummary", "PhyloSummary"); exit(1); } } /**************************************************************************************************/ PhyloSummary::PhyloSummary(CountTable* c, bool r, int p){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = true; numSeqs = 0; relabund = r; printlevel = p; ct = c; groupmap = nullptr; tree.push_back(rawTaxNode("Root")); tree[0].rank = "0"; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "PhyloSummary"); exit(1); } } /**************************************************************************************************/ PhyloSummary::PhyloSummary(string refTfile, GroupMap* g, bool r, int p){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = false; numSeqs = 0; relabund = r; printlevel = p; groupmap = g; ct = nullptr; //check for necessary files string taxFileNameTest = util.getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); ifstream FileTest(taxFileNameTest.c_str()); if (!FileTest) { m->mothurOut("Error: can't find " + taxFileNameTest + ".\n"); exit(1); }else{ readTreeStruct(FileTest); } tree[0].rank = "0"; assignRank(0); } catch(exception& e) { m->errorOut(e, "PhyloSummary", "PhyloSummary"); exit(1); } } /**************************************************************************************************/ PhyloSummary::PhyloSummary(GroupMap* g, bool r, int p){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = true; numSeqs = 0; relabund = r; printlevel = p; groupmap = g; ct = nullptr; tree.push_back(rawTaxNode("Root")); tree[0].rank = "0"; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "PhyloSummary"); exit(1); } } /**************************************************************************************************/ int PhyloSummary::summarize(string userTfile){ try { map temp; util.readTax(userTfile, temp, true); for (map::iterator itTemp = temp.begin(); itTemp != temp.end();) { addSeqToTree(itTemp->first, itTemp->second); temp.erase(itTemp++); } return numSeqs; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "summarize"); exit(1); } } /**************************************************************************************************/ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ try { numSeqs++; map::iterator childPointer; int currentNode = 0; string taxon; int level = 0; util.removeConfidences(seqTaxonomy); vector taxons; util.splitAtChar(seqTaxonomy, taxons, ';'); for(string taxon : taxons) { level++; if (m->getControl_pressed()) { return 0; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on int thisCount = 1; if (groupmap != nullptr) { //find out the sequences group string group = groupmap->getGroup(seqName); if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total.\n"); } //do you have a count for this group? map::iterator itGroup = tree[childPointer->second].groupCount.find(group); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[childPointer->second].groupCount.end()) { tree[childPointer->second].groupCount[group]++; } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { vector groupCounts = ct->getGroupCounts(seqName); vector groups = ct->getNamesOfGroups(); for (int i = 0; i < groups.size(); i++) { if (groupCounts[i] != 0) { //do you have a count for this group? map::iterator itGroup = tree[childPointer->second].groupCount.find(groups[i]); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[childPointer->second].groupCount.end()) { tree[childPointer->second].groupCount[groups[i]] += groupCounts[i]; } } } } thisCount = ct->getNumSeqs(seqName); } tree[childPointer->second].total += thisCount; currentNode = childPointer->second; }else{ if (ignore) { tree.push_back(rawTaxNode(taxon)); int index = tree.size() - 1; tree[index].parent = currentNode; tree[index].level = level; tree[currentNode].children[taxon] = index; int thisCount = 1; //initialize groupcounts if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { tree[index].groupCount[mGroups[j]] = 0; } //find out the sequences group string group = groupmap->getGroup(seqName); if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total.\n"); } //do you have a count for this group? map::iterator itGroup = tree[index].groupCount.find(group); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[index].groupCount.end()) { tree[index].groupCount[group]++; } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { vector mGroups = ct->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { tree[index].groupCount[mGroups[j]] = 0; } vector groupCounts = ct->getGroupCounts(seqName); vector groups = ct->getNamesOfGroups(); for (int i = 0; i < groups.size(); i++) { if (groupCounts[i] != 0) { //do you have a count for this group? map::iterator itGroup = tree[index].groupCount.find(groups[i]); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[index].groupCount.end()) { tree[index].groupCount[groups[i]]+=groupCounts[i]; } } } } thisCount = ct->getNumSeqs(seqName); } tree[index].total = thisCount; currentNode = index; }else{ //otherwise, error m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file.\n"); break; } } } if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "addSeqToTree"); exit(1); } } /**************************************************************************************************/ int PhyloSummary::addSeqToTree(string seqTaxonomy, map containsGroup){ try { numSeqs++; map::iterator childPointer; int currentNode = 0; string taxon; int level = 0; util.removeConfidences(seqTaxonomy); vector taxons; util.splitAtChar(seqTaxonomy, taxons, ';'); for(string taxon : taxons) { level++; if (m->getControl_pressed()) { return 0; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { if (itGroup->second ) { tree[childPointer->second].groupCount[itGroup->first]++; } } tree[childPointer->second].total++; currentNode = childPointer->second; }else{ if (ignore) { tree.push_back(rawTaxNode(taxon)); int index = tree.size() - 1; tree[index].parent = currentNode; tree[index].level = level; tree[index].total = 1; tree[currentNode].children[taxon] = index; for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { if (itGroup->second ) { tree[index].groupCount[itGroup->first]++; } } currentNode = index; }else{ //otherwise, error m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file.\n"); break; } } } if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "addSeqToTree"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::assignRank(int index){ try { map::iterator it; int counter = 1; for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ tree[it->second].rank = tree[index].rank + '.' + toString(counter); counter++; assignRank(it->second); } } catch(exception& e) { m->errorOut(e, "PhyloSummary", "assignRank"); exit(1); } } /**************************************************************************************************/ string PhyloSummary::findTaxon(string rank){ try { vector pieces; vector indexes; util.splitAtChar(rank, pieces, '.'); for (int i = 0; i < pieces.size(); i++) { int temp; util.mothurConvert(pieces[i], temp); indexes.push_back(temp); } string taxon = ""; getTaxons(indexes, 1, 0, taxon); return taxon; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "findTaxon"); exit(1); } } /**************************************************************************************************/ string PhyloSummary::getTaxons(vector indexes, int index, int i, string& taxon){ try { int counter = 1; for(map::iterator it=tree[i].children.begin();it!=tree[i].children.end();it++){ if (counter == indexes[index]) { taxon += tree[it->second].name + ";"; getTaxons(indexes, index+1, it->second, taxon); } counter++; } return taxon; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "getNextTaxon"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::print(ofstream& out, string output){ try { if (ignore) { assignRank(0); } vector mGroups; //print labels if (output == "detail") { out << "taxlevel\trankID\ttaxon\tdaughterlevels\ttotal"; } else { out << "taxonomy\ttotal"; } if (printlevel == -1) { printlevel = maxLevel; } else if (printlevel > maxLevel) { m->mothurOut("[WARNING]: Your printlevel is greater than your maxlevel, adjusting your printlevel to " + toString(maxLevel) + "\n"); printlevel = maxLevel; } if (groupmap != nullptr) { //so the labels match the counts below, since the map sorts them automatically... //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end()); mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { out << '\t' << mGroups[i]; } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { mGroups = ct->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { out << '\t' << mGroups[i]; } } } out << endl; int totalChildrenInTree = 0; map::iterator itGroup; map::iterator it; for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ if (tree[it->second].total != 0) { totalChildrenInTree++; tree[0].total += tree[it->second].total; if (groupmap != nullptr) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }else if ( ct != nullptr) { if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } } } } } //print root if (relabund) { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); if (output == "detail") { out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << (tree[0].total/(double) tree[0].total); }else{ out << tree[0].name << "\t" << (tree[0].total/(double) tree[0].total); } if (groupmap != nullptr) { for (int i = 0; i < mGroups.size(); i++) { double thisNum = tree[0].groupCount[mGroups[i]]; thisNum /= (double) groupmap->getNumSeqs(mGroups[i]); out << '\t' << thisNum; } }else if ( ct != nullptr) { if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { double thisNum = tree[0].groupCount[mGroups[i]]; thisNum /= (double) ct->getGroupCount(mGroups[i]); out << '\t' << thisNum; } } } out << endl; }else { if (output == "detail") { out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total; }else{ out << tree[0].name << '\t' << tree[0].total; } if (groupmap != nullptr) { for (int i = 0; i < mGroups.size(); i++) { out << '\t'<< tree[0].groupCount[mGroups[i]]; } }else if ( ct != nullptr) { if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << '\t' << tree[0].groupCount[mGroups[i]]; } } } out << endl; } //print rest print(0, out, output); } catch(exception& e) { m->errorOut(e, "PhyloSummary", "print"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::print(ofstream& out, bool relabund){ try { if (ignore) { assignRank(0); } int totalChildrenInTree = 0; map::iterator itGroup; map::iterator it; for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ if (tree[it->second].total != 0) { totalChildrenInTree++; tree[0].total += tree[it->second].total; if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }else if ( ct != nullptr) { vector mGroups = ct->getNamesOfGroups(); if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } } } } } //print root out << tree[0].name << "\t" << "1.0000"; //root relative abundance is 1, everyone classifies to root if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { out << '\t' << "1.0000"; } }else if ( ct != nullptr) { vector mGroups = ct->getNamesOfGroups(); if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << '\t' << "1.0000"; } } } out << endl; //print rest print(0, out, relabund); } catch(exception& e) { m->errorOut(e, "PhyloSummary", "print"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::print(int i, ofstream& out, string output){ try { map::iterator it; for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ if (tree[it->second].total != 0) { int totalChildrenInTree = 0; map::iterator it2; for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){ if (tree[it2->second].total != 0) { totalChildrenInTree++; } } if ((output == "detail") && (printlevel >= tree[it->second].level)) { if (relabund) { out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << (tree[it->second].total/(double) tree[0].total); }else { out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total; } }else { if (printlevel == tree[it->second].level) { //leaf node - we want to print it. Use rank to find full taxonomy if (relabund) { out << findTaxon(tree[it->second].rank) << '\t' << tree[it->second].total/(double) tree[0].total; }else { out << findTaxon(tree[it->second].rank) << '\t' << tree[it->second].total; } } } if (relabund) { map::iterator itGroup; if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { if (((output == "detail") && (printlevel >= tree[it->second].level)) || (printlevel == tree[it->second].level)) { out << '\t' << (tree[it->second].groupCount[mGroups[i]]/(double)groupmap->getNumSeqs(mGroups[i])); } } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { vector mGroups = ct->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { if (((output == "detail") && (printlevel >= tree[it->second].level)) || (printlevel == tree[it->second].level)) { out << '\t' << (tree[it->second].groupCount[mGroups[i]]/(double)ct->getGroupCount(mGroups[i])); } } } } }else { map::iterator itGroup; if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { if (((output == "detail") && (printlevel >= tree[it->second].level)) || (printlevel == tree[it->second].level)) { out << '\t' << tree[it->second].groupCount[mGroups[i]]; } } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { vector mGroups = ct->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { if (((output == "detail") && (printlevel >= tree[it->second].level)) || (printlevel == tree[it->second].level)) { out << '\t' << tree[it->second].groupCount[mGroups[i]]; } } } } } if (((output == "detail") && (printlevel >= tree[it->second].level)) || (printlevel == tree[it->second].level)) { out << endl; } } print(it->second, out, output); } } catch(exception& e) { m->errorOut(e, "PhyloSummary", "print"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::print(int i, ofstream& out, bool relabund){ try { map::iterator it; for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ if (tree[it->second].total != 0) { int totalChildrenInTree = 0; map::iterator it2; for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){ if (tree[it2->second].total != 0) { totalChildrenInTree++; } } string nodeName = ""; int thisNode = it->second; while (tree[thisNode].rank != "0") { //while you are not at top if (m->getControl_pressed()) { break; } nodeName = tree[thisNode].name + "|" + nodeName; thisNode = tree[thisNode].parent; } if (nodeName != "") { nodeName = nodeName.substr(0, nodeName.length()-1); } out << nodeName << "\t" << (tree[it->second].total / (float)tree[i].total); map::iterator itGroup; if (groupmap != nullptr) { vector mGroups = groupmap->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { if (tree[i].groupCount[mGroups[j]] == 0) { out << '\t' << 0; }else { out << '\t' << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]); } } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { vector mGroups = ct->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { if (tree[i].groupCount[mGroups[j]] == 0) { out << '\t' << 0 ; }else { out << '\t' << (tree[it->second].groupCount[mGroups[j]] / (float)tree[i].groupCount[mGroups[j]]); } } } } out << endl; } print(it->second, out, relabund); } } catch(exception& e) { m->errorOut(e, "PhyloSummary", "print"); exit(1); } } /**************************************************************************************************/ void PhyloSummary::readTreeStruct(ifstream& in){ try { //read version string line = util.getline(in); gobble(in); int num; in >> num; gobble(in); tree.resize(num); in >> maxLevel; gobble(in); //read the tree file for (int i = 0; i < tree.size(); i++) { in >> tree[i].level >> num; gobble(in); //num contains the number of children tree[i] has tree[i].name = util.getline(in); gobble(in); //set children string childName; int childIndex; for (int j = 0; j < num; j++) { in >> childIndex; gobble(in); childName = util.getline(in); gobble(in); tree[i].children[childName] = childIndex; } //initialize groupcounts if (groupmap != nullptr) { for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) { tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0; } }else if (ct != nullptr) { if (ct->hasGroupInfo()) { for (int j = 0; j < (ct->getNamesOfGroups()).size(); j++) { tree[i].groupCount[(ct->getNamesOfGroups())[j]] = 0; } } } tree[i].total = 0; gobble(in); //if (tree[i].level > maxLevel) { maxLevel = tree[i].level; } } } catch(exception& e) { m->errorOut(e, "PhyloSummary", "readTreeStruct"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/phylosummary.h000077500000000000000000000034251424121717000214760ustar00rootroot00000000000000#ifndef RAWTRAININGDATAMAKER_H #define RAWTRAININGDATAMAKER_H /* * rawTrainingDataMaker.h * Mothur * * Created by westcott on 4/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" #include "groupmap.h" #include "counttable.h" #include "utils.hpp" /**************************************************************************************************/ struct rawTaxNode { map children; //childs name to index in tree int parent, level; string name, rank; map groupCount; int total; rawTaxNode(string n) : name(n), level(0), parent(-1), total(0) {} rawTaxNode(){} }; /**************************************************************************************************/ class PhyloSummary { public: PhyloSummary(GroupMap*, bool, int); PhyloSummary(string, GroupMap*, bool, int); PhyloSummary(CountTable*, bool, int); PhyloSummary(string, CountTable*, bool, int); ~PhyloSummary() = default; int summarize(string); //pass it a taxonomy file and a group file and it makes the tree int addSeqToTree(string, string); int addSeqToTree(string, map); void print(ofstream&, string); void print(ofstream&, bool); int getMaxLevel() { return maxLevel; } private: vector tree; void print(int, ofstream&, string); void print(int, ofstream&, bool); void assignRank(int); string getTaxons(vector indexes, int index, int i, string&); void readTreeStruct(ifstream&); string findTaxon(string); GroupMap* groupmap; CountTable* ct; bool ignore, relabund; int numNodes, printlevel; int numSeqs; int maxLevel; MothurOut* m; Utils util; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/phylotree.cpp000077500000000000000000000501131424121717000212670ustar00rootroot00000000000000/* * doTaxonomy.cpp * * * Created by Pat Schloss on 6/17/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "phylotree.h" /**************************************************************************************************/ PhyloTree::PhyloTree(){ try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); numNodes = 1; numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; tree[0].level = 0; maxLevel = 0; calcTotals = true; addSeqToTree("unknown", "unknown;"); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } } /**************************************************************************************************/ PhyloTree::PhyloTree(ifstream& in, string filename){ try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); calcTotals = false; numNodes = 0; numSeqs = 0; //read version string line = util.getline(in); gobble(in); in >> numNodes; gobble(in); tree.resize(numNodes); in >> maxLevel; gobble(in); for (int i = 0; i < tree.size(); i++) { tree[i].name = util.getline(in); gobble(in); in >> tree[i].level >> tree[i].parent; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(i) + '\t' + tree[i].name + '\t' + toString(tree[i].level) + "\n"); } } //read genus nodes int numGenus = 0; in >> numGenus; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(numNodes) + '\t' + toString(numGenus) + '\t' + toString(maxLevel) + "\n"); } int gnode, gsize; totals.clear(); for (int i = 0; i < numGenus; i++) { in >> gnode >> gsize; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(gnode) + '\t' + toString(gsize) + '\t' + toString(i) + "\n"); } uniqueTaxonomies.insert(gnode); totals.push_back(gsize); } in.close(); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } } /**************************************************************************************************/ PhyloTree::PhyloTree(string tfile){ try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); numNodes = 1; numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; tree[0].level = 0; maxLevel = 0; calcTotals = true; string name, tax; map temp; util.readTax(tfile, temp, true); for (map::iterator itTemp = temp.begin(); itTemp != temp.end();) { addSeqToTree(itTemp->first, itTemp->second); temp.erase(itTemp++); } string unknownTax = "unknown;"; //added last taxon until you get desired level for (int i = 1; i < maxLevel; i++) { unknownTax += "unknown_unclassfied;"; } addSeqToTree("unknown", unknownTax); assignHeirarchyIDs(0); //create file for summary if needed setUp(tfile); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } } /**************************************************************************************************/ vector PhyloTree::getSeqs(string seqTaxonomy){ try { string taxCopy = seqTaxonomy; vector names; map::iterator childPointer; int currentNode = 0; util.removeConfidences(seqTaxonomy); vector taxons; util.splitAtChar(seqTaxonomy, taxons, ';'); for(string taxon : taxons) { if (m->getControl_pressed()) { return names; } if (m->getDebug()) { m->mothurOut(taxon +'\n'); } if (taxon == "") { m->mothurOut(taxCopy + " has an error in the taxonomy. This may be due to a ;;\n"); break; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; } else{ //otherwise, error this taxonomy is not in tree m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct.\n"); m->setControl_pressed(true); return names; } } //return names in this taxonomy names = tree[currentNode].accessions; return names; } catch(exception& e) { m->errorOut(e, "PhyloTree", "getSeqs"); exit(1); } } /**************************************************************************************************/ int PhyloTree::addSeqToTree(string seqName, vector seqTax){ try { numSeqs++; map::iterator childPointer; int currentNode = 0; int level = 0; tree[0].accessions.push_back(seqName); for (int i = 0; i < seqTax.size(); i++) { level++; string taxon = seqTax[i].name; if (m->getControl_pressed()) { return 0; } if (m->getDebug()) { m->mothurOut(seqName +'\t' + taxon +'\n'); } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } else{ //otherwise, create it tree.push_back(TaxNode(taxon)); tree[currentNode].children[taxon] = numNodes; tree[numNodes].level = level; tree[numNodes].parent = currentNode; currentNode = numNodes; numNodes++; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } if (i == (seqTax.size()-1)) { uniqueTaxonomies.insert(currentNode); } //last level } //save maxLevel for binning the unclassified seqs if (level > maxLevel) { maxLevel = level; } return level; } catch(exception& e) { m->errorOut(e, "PhyloTree", "addSeqToTree"); exit(1); } } /**************************************************************************************************/ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ try { numSeqs++; map::iterator childPointer; tree[0].accessions.push_back(seqName); util.removeConfidences(seqTaxonomy); vector taxons; util.splitAtChar(seqTaxonomy, taxons, ';'); int level = 0; int currentNode = 0; for(string taxon : taxons) { level++; if (m->getControl_pressed()) { return 0; } if (m->getDebug()) { m->mothurOut(seqName +'\t' + taxon +'\n'); } if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;\n"); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } else{ //otherwise, create it tree.push_back(TaxNode(taxon)); tree[currentNode].children[taxon] = numNodes; tree[numNodes].level = level; tree[numNodes].parent = currentNode; currentNode = numNodes; numNodes++; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } } uniqueTaxonomies.insert(currentNode); //save maxLevel for binning the unclassified seqs if (level > maxLevel) { maxLevel = level; } return level; } catch(exception& e) { m->errorOut(e, "PhyloTree", "addSeqToTree"); exit(1); } } /**************************************************************************************************/ vector PhyloTree::getGenusNodes() { try { genusIndex.clear(); //generate genusIndexes set::iterator it2; map temp; for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(*it2); temp[*it2] = genusIndex.size()-1; } for (map::iterator itName = name2Taxonomy.begin(); itName != name2Taxonomy.end(); itName++) { map::iterator itTemp = temp.find(itName->second); if (itTemp != temp.end()) { name2GenusNodeIndex[itName->first] = itTemp->second; } else { m->mothurOut("[ERROR]: trouble making name2GenusNodeIndex, aborting.\n"); m->setControl_pressed(true); } } return genusIndex; } catch(exception& e) { m->errorOut(e, "PhyloTree", "getGenusNodes"); exit(1); } } /**************************************************************************************************/ vector PhyloTree::getGenusTotals() { try { if (calcTotals) { totals.clear(); //reset counts because we are on a new word for (int j = 0; j < genusIndex.size(); j++) { totals.push_back(tree[genusIndex[j]].accessions.size()); } return totals; }else{ return totals; } } catch(exception& e) { m->errorOut(e, "PhyloTree", "getGenusNodes"); exit(1); } } /**************************************************************************************************/ vector PhyloTree::getNodes(int level) { try { vector thisLevelsNodes; //check level is not above max if (level > maxLevel) { m->mothurOut("[ERROR]: Tax level not present in file. Cannot continue.\n"); m->setControl_pressed(true); return thisLevelsNodes; } for (size_t i = 0; i < tree.size(); i++) { if (tree[i].level == level) { thisLevelsNodes.push_back(tree[i]); } } return thisLevelsNodes; } catch(exception& e) { m->errorOut(e, "PhyloTree", "getNodes"); exit(1); } } /**************************************************************************************************/ void PhyloTree::assignHeirarchyIDs(int index){ try { map::iterator it; int counter = 1; for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ if (m->getDebug()) { m->mothurOut(toString(index) +'\t' + tree[it->second].name +'\n'); } tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter); counter++; //tree[it->second].level = tree[index].level + 1; //save maxLevel for binning the unclassified seqs if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } assignHeirarchyIDs(it->second); } } catch(exception& e) { m->errorOut(e, "PhyloTree", "assignHeirarchyIDs"); exit(1); } } /**************************************************************************************************/ void PhyloTree::setUp(string tfile){ try{ string taxFileNameTest = tfile.substr(0,tfile.find_last_of(".")+1) + "tree.sum"; binUnclassified(taxFileNameTest); } catch(exception& e) { m->errorOut(e, "PhyloTree", "setUp"); exit(1); } } /**************************************************************************************************/ void PhyloTree::binUnclassified(string file){ try { ofstream out; util.openOutputFile(file, out); map::iterator itBin; map::iterator childPointer; vector copy = tree; //fill out tree fillOutTree(0, copy); //get leaf nodes that may need extension for (int i = 0; i < copy.size(); i++) { if (copy[i].children.size() == 0) { leafNodes[i] = i; } } if (m->getDebug()) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); } int copyNodes = copy.size(); //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary map::iterator itLeaf; for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) { if (m->getControl_pressed()) { out.close(); break; } int level = copy[itLeaf->second].level; int currentNode = itLeaf->second; if (m->getDebug()) { m->mothurOut(copy[currentNode].name +'\n'); } //this sequence is unclassified at some levels while(level < maxLevel){ level++; if (m->getDebug()) { m->mothurOut("level = " + toString(level) +'\n'); } string taxon = "unclassified"; //does the parent have a child names 'unclassified'? childPointer = copy[currentNode].children.find(taxon); if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; //currentNode becomes 'unclassified' } else{ //otherwise, create it copy.push_back(TaxNode(taxon)); copy[currentNode].children[taxon] = copyNodes; copy[copyNodes].parent = currentNode; copy[copyNodes].level = copy[currentNode].level + 1; currentNode = copyNodes; copyNodes++; } } } if (!m->getControl_pressed()) { //print copy tree print(out, copy); } } catch(exception& e) { m->errorOut(e, "PhyloTree", "binUnclassified"); exit(1); } } /**************************************************************************************************/ void PhyloTree::fillOutTree(int index, vector& copy) { try { map::iterator it; it = copy[index].children.find("unclassified"); if (it == copy[index].children.end()) { //no unclassified at this level string taxon = "unclassified"; copy.push_back(TaxNode(taxon)); copy[index].children[taxon] = copy.size()-1; copy[copy.size()-1].parent = index; copy[copy.size()-1].level = copy[index].level + 1; } if (tree[index].level < maxLevel) { for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children fillOutTree(it->second, copy); } } } catch(exception& e) { m->errorOut(e, "PhyloTree", "fillOutTree"); exit(1); } } /**************************************************************************************************/ string PhyloTree::getFullTaxonomy(string seqName) { try { string tax = ""; int currentNode = name2Taxonomy[seqName]; while (tree[currentNode].parent != -1) { tax = tree[currentNode].name + ";" + tax; currentNode = tree[currentNode].parent; } return tax; } catch(exception& e) { m->errorOut(e, "PhyloTree", "getFullTaxonomy"); exit(1); } } /**************************************************************************************************/ void PhyloTree::print(ofstream& out, vector& copy){ try { //output mothur version out << "#" << current->getVersion() << endl; out << copy.size() << endl; out << maxLevel << endl; for (int i = 0; i < copy.size(); i++) { out << copy[i].level << '\t' << copy[i].children.size() << endl; out << copy[i].name << endl; map::iterator it; for(it=copy[i].children.begin();it!=copy[i].children.end();it++){ out << it->second << '\t' << it->first << endl; } out << endl; } out.close(); } catch(exception& e) { m->errorOut(e, "PhyloTree", "print"); exit(1); } } /**************************************************************************************************/ void PhyloTree::printTreeNodes(string treefilename) { try { ofstream outTree; util.openOutputFile(treefilename, outTree); //output mothur version outTree << "#" << current->getVersion() << endl; //print treenodes outTree << tree.size() << endl; outTree << maxLevel << endl; for (int i = 0; i < tree.size(); i++) { outTree << tree[i].name << endl; outTree << tree[i].level << '\t' << tree[i].parent << endl; } //print genus nodes outTree << endl << uniqueTaxonomies.size() << endl; set::iterator it2; for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << *it2 << '\t' << tree[*it2].accessions.size() << endl; } outTree << endl; outTree.close(); } catch(exception& e) { m->errorOut(e, "PhyloTree", "printTreeNodes"); exit(1); } } /**************************************************************************************************/ TaxNode PhyloTree::get(int i ){ try { if (i < tree.size()) { return tree[i]; } else { m->mothurOut("Mismatch with taxonomy and template files. Cannot continue.\n"); exit(1); } } catch(exception& e) { m->errorOut(e, "PhyloTree", "get"); exit(1); } } /**************************************************************************************************/ TaxNode PhyloTree::get(string seqName){ try { map::iterator itFind = name2Taxonomy.find(seqName); if (itFind != name2Taxonomy.end()) { return tree[name2Taxonomy[seqName]]; } else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue.\n"); exit(1);} } catch(exception& e) { m->errorOut(e, "PhyloTree", "get"); exit(1); } } /**************************************************************************************************/ string PhyloTree::getName(int i ){ try { if (i < tree.size()) { return tree[i].name; } else { m->mothurOut("Mismatch with taxonomy and template files. Cannot continue.\n"); exit(1); } } catch(exception& e) { m->errorOut(e, "PhyloTree", "get"); exit(1); } } /**************************************************************************************************/ int PhyloTree::getGenusIndex(string seqName){ try { map::iterator itFind = name2GenusNodeIndex.find(seqName); if (itFind != name2GenusNodeIndex.end()) { return itFind->second; } else { m->mothurOut("Cannot find " + seqName + ". Could be a mismatch with taxonomy and template files. Cannot continue.\n"); exit(1);} } catch(exception& e) { m->errorOut(e, "PhyloTree", "get"); exit(1); } } /**************************************************************************************************/ bool PhyloTree::ErrorCheck(vector templateFileNames){ try { bool okay = true; templateFileNames.push_back("unknown"); map::iterator itFind; map taxonomyFileNames = name2Taxonomy; if (m->getDebug()) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); } for (int i = 0; i < templateFileNames.size(); i++) { itFind = taxonomyFileNames.find(templateFileNames[i]); if (itFind != taxonomyFileNames.end()) { //found it so erase it taxonomyFileNames.erase(itFind); }else { m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct.\n"); okay = false; } //templateFileNames.erase(templateFileNames.begin()+i); //i--; } templateFileNames.clear(); if (taxonomyFileNames.size() > 0) { //there are names in tax file that are not in template okay = false; for (itFind = taxonomyFileNames.begin(); itFind != taxonomyFileNames.end(); itFind++) { m->mothurOut(itFind->first + " is in your taxonomy file and is not in your template file. Please correct.\n"); } } return okay; } catch(exception& e) { m->errorOut(e, "PhyloTree", "ErrorCheck"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/phylotree.h000077500000000000000000000054221424121717000207370ustar00rootroot00000000000000#ifndef DOTAXONOMY_H #define DOTAXONOMY_H /* * phylotree.h * * * Created by Pat Schloss on 6/17/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" #include "currentfile.h" /**************************************************************************************************/ struct TaxNode { vector accessions; //names of seqs in this branch of tree map children; //childs name to index in tree int parent, childNumber, level; string name, heirarchyID; TaxNode(string n) : name(n), level(0), parent(-1) { } TaxNode(){} }; /**************************************************************************************************/ class PhyloTree { #ifdef UNIT_TEST friend class TestPhyloTree; #endif public: PhyloTree(); PhyloTree(string); //pass it a taxonomy file and it makes the tree PhyloTree(ifstream&, string); //pass it a taxonomy file and it makes the train.tree ~PhyloTree() = default;; int addSeqToTree(string, string); int addSeqToTree(string, vector); void assignHeirarchyIDs(int); void printTreeNodes(string); //used by bayesian to save time vector getGenusNodes(); vector getGenusTotals(); void setUp(string); //used to create file needed for summary file if you use () constructor and add seqs manually instead of passing taxonomyfile TaxNode get(int i); TaxNode get(string seqName); vector getNodes(int); //returns vector of nodes at given level string getName(int i); int getGenusIndex(string seqName); string getFullTaxonomy(string); //pass a sequence name return taxonomy vector getSeqs(string); //returns names of sequences in given taxonomy int getMaxLevel() { return maxLevel; } int getNumSeqs() { return numSeqs; } int getNumNodes() { return (int)tree.size(); } bool ErrorCheck(vector); private: void print(ofstream&, vector&); //used to create static reference taxonomy file void fillOutTree(int, vector&); //used to create static reference taxonomy file void binUnclassified(string); vector tree; vector genusIndex; //holds the indexes in tree where the genus level taxonomies are stored vector totals; //holds the numSeqs at each genus level taxonomy map name2Taxonomy; //maps name to index in tree map name2GenusNodeIndex; set uniqueTaxonomies; //map of unique taxonomies map leafNodes; //used to create static reference taxonomy file //void print(int, ofstream&); int numNodes; int numSeqs; int maxLevel; bool calcTotals; MothurOut* m; CurrentFile* current; Utils util; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/taxonomyequalizer.cpp000077500000000000000000000067701424121717000230660ustar00rootroot00000000000000/* * taxonomyequalizer.cpp * Mothur * * Created by westcott on 11/20/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "taxonomyequalizer.h" /**************************************************************************************************/ TaxEqualizer::TaxEqualizer(string tfile, int c, string o) : cutoff(c), outputDir(o) { try { m = MothurOut::getInstance(); containsConfidence = false; ifstream inTax; util.openInputFile(tfile, inTax); highestLevel = getHighestLevel(inTax); if (!m->getControl_pressed()) { //if the user has specified a cutoff and it's smaller than the highest level if ((cutoff != -1) && (cutoff < highestLevel)) { highestLevel = cutoff; }else if (cutoff > highestLevel) { m->mothurOut("The highest level taxonomy you have is " + toString(highestLevel) + " and your cutoff is " + toString(cutoff) + ". I will set the cutoff to " + toString(highestLevel)+ "\n"); } inTax.close(); ifstream in; util.openInputFile(tfile, in); equalizedFile = outputDir + util.getRootName(util.getSimpleName(tfile)) + "equalized.taxonomy"; ofstream out; util.openOutputFile(equalizedFile, out); string name, tax; while (in) { if (m->getControl_pressed()) { break; } in >> name; gobble(in); tax = util.getline(in); gobble(in); if (containsConfidence) { util.removeConfidences(tax); } //is this a taxonomy that needs to be extended? if (seqLevels[name] < highestLevel) { tax = util.addUnclassifieds(tax, highestLevel, containsConfidence); }else if (seqLevels[name] > highestLevel) { //this can happen if the user enters a cutoff tax = util.trimTax(tax, highestLevel); } out << name << '\t' << tax << endl; } in.close(); out.close(); if (m->getControl_pressed()) { util.mothurRemove(equalizedFile); } }else { inTax.close(); } } catch(exception& e) { m->errorOut(e, "TaxEqualizer", "TaxEqualizer"); exit(1); } } /**************************************************************************************************/ int TaxEqualizer::getHighestLevel(ifstream& in) { try { int level = 0; string name, tax; while (in) { in >> name; gobble(in); tax = util.getline(in); gobble(in); //count levels in this taxonomy int thisLevel = 0; for (int i = 0; i < tax.length(); i++) { if (tax[i] == ';') { thisLevel++; } } //save sequences level seqLevels[name] = thisLevel; //is this the longest taxonomy? if (thisLevel > level) { level = thisLevel; testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out } if (!containsConfidence) { int openParen = testTax.find_last_of('('); int closeParen = testTax.find_last_of(')'); if ((openParen != string::npos) && (closeParen != string::npos)) { string confidenceScore = testTax.substr(openParen+1, (closeParen-(openParen+1))); if (util.isNumeric1(confidenceScore)) { //its a confidence containsConfidence = true; } } } } return level; } catch(exception& e) { m->errorOut(e, "TaxEqualizer", "getHighestLevel"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/taxonomyequalizer.h000077500000000000000000000022451424121717000225240ustar00rootroot00000000000000#ifndef TAXONOMYEQUALIZER_H #define TAXONOMYEQUALIZER_H /* * taxonomyequalizer.h * Mothur * * Created by westcott on 11/20/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" #include "utils.hpp" //reads in taxonomy file and makes all the taxonomies the same length //by appending the last taxon to a given taxonomy as many times as needed to //make it as long as the longest taxonomy in the file /**************************************************************************************************/ class TaxEqualizer { public: TaxEqualizer(string, int, string); ~TaxEqualizer() = default;; string getEqualizedTaxFile() { return equalizedFile; } int getHighestLevel() { return highestLevel; } private: string equalizedFile, testTax, outputDir; bool containsConfidence; int cutoff, highestLevel; map seqLevels; //maps name to level of taxonomy int getHighestLevel(ifstream&); //scans taxonomy file to find taxonomy with highest level MothurOut* m; Utils util; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/classifier/taxonomynode.cpp000077500000000000000000000016001424121717000217750ustar00rootroot00000000000000/* * taxonomynode.cpp * * * Created by Pat Schloss on 7/8/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ /**************************************************************************************************/ #include "taxonomynode.h" /**************************************************************************************************/ TaxonomyNode::TaxonomyNode(string n, int l): name(n), level(l){ m = MothurOut::getInstance(); parent = -1; numChildren = 0; numSeqs = 0; } /**************************************************************************************************/ int TaxonomyNode::getChildIndex(string c){ map::iterator it = children.find(c); if(it != children.end()) { return it->second; } else { return -1; } } /**************************************************************************************************/ mothur-1.48.0/source/classifier/taxonomynode.h000077500000000000000000000026421424121717000214510ustar00rootroot00000000000000#ifndef TAXONOMYNODE #define TAXONOMYNODE /* * taxonomynode.h * * * Created by Pat Schloss on 7/8/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ /**************************************************************************************************/ #include "mothurout.h" /**************************************************************************************************/ class TaxonomyNode { public: TaxonomyNode(); TaxonomyNode(string, int); void setName(string n) { name = n; } string getName() { return name; } void setParent(int p) { parent = p; } int getParent() { return parent; } void makeChild(string c, int i) { children[c] = i; } map getChildren() { return children; } int getNumKids() { return (int)children.size(); } int getNumSeqs() { return numSeqs; } void setTotalSeqs(int n) { totalSeqs = n; } int getLevel() { return level; } int getChildIndex(string); private: int parent; map children; int numChildren; int level; protected: MothurOut* m; int numSeqs; int totalSeqs; string name; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/clearcut/000077500000000000000000000000001424121717000162235ustar00rootroot00000000000000mothur-1.48.0/source/clearcut/README.txt000066400000000000000000000004161424121717000177220ustar00rootroot00000000000000The source files in this folder were originally part of the clearcut program, https://github.com/ibest/clearcut/. They files are used only by the clearcut command. They are primarily written in C and mothur's clearcut command calls clearcut_main in clearcut.cpp file. mothur-1.48.0/source/clearcut/clearcut.cpp000077500000000000000000001434221424121717000205420ustar00rootroot00000000000000 /* * clearcut.c * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * An implementation of the Relaxed Neighbor-Joining algorithm * of Evans, J., Sheneman, L., and Foster, J. * * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #include #include #include #include #include #include #include #include #include "distclearcut.h" #include "dmat.h" #include "fasta.h" #include "cmdargs.h" #include "common.h" #include "clearcut.h" #include "prng.h" /* * main() - * * The entry point to the program. * */ int clearcut_main(int argc, char *argv[]) { DMAT *dmat; /* The working distance matrix */ DMAT *dmat_backup = nullptr;/* A backup distance matrix */ NJ_TREE *tree; /* The phylogenetic tree */ NJ_ARGS *nj_args; /* Structure for holding command-line arguments */ long int i; /* some variables for tracking time */ struct timeval tv; unsigned long long startUs, endUs; /* check and parse supplied command-line arguments */ nj_args = NJ_handle_args(argc, argv); if(!nj_args) { fprintf(stderr, "Clearcut: Error processing command-line arguments.\n"); exit(-1); } /* for verbose reporting, print the random number seed to stdout */ if(nj_args->verbose_flag) { printf("PRNG SEED: %d\n", nj_args->seed); } /* Initialize Mersenne Twister PRNG */ init_genrand(nj_args->seed); switch(nj_args->input_mode) { /* If the input type is a distance matrix */ case NJ_INPUT_MODE_DISTANCE: /* parse the distance matrix */ dmat = NJ_parse_distance_matrix(nj_args); if(!dmat) { exit(-1); } break; /* If the input type is a multiple sequence alignment */ case NJ_INPUT_MODE_ALIGNED_SEQUENCES: /* build a distance matrix from a multiple sequence alignment */ dmat = NJ_build_distance_matrix(nj_args); if(!dmat) { fprintf(stderr, "Clearcut: Failed to build distance matrix from alignment.\n"); exit(-1); } break; default: fprintf(stderr, "Clearcut: Could not determine how to process input\n"); exit(-1); } /* * Output the computed distance matrix, * if the user specified one. */ if(nj_args->matrixout) { NJ_output_matrix(nj_args, dmat); } /* * If we are going to generate multiple trees from * the same distance matrix, we need to make a backup * of the original distance matrix. */ if(nj_args->ntrees > 1) { dmat_backup = NJ_dup_dmat(dmat); } /* process n trees */ for(i=0;intrees;i++) { /* * If the user has specified matrix shuffling, we need * to randomize the distance matrix */ if(nj_args->shuffle) { NJ_shuffle_distance_matrix(dmat); } /* RECORD THE PRECISE TIME OF THE START OF THE NEIGHBOR-JOINING */ gettimeofday(&tv, nullptr); startUs = ((unsigned long long) tv.tv_sec * 1000000ULL) + ((unsigned long long) tv.tv_usec); /* * Invoke either the Relaxed Neighbor-Joining algorithm (default) * or the "traditional" Neighbor-Joining algorithm */ if(nj_args->neighbor) { tree = NJ_neighbor_joining(nj_args, dmat); } else { tree = NJ_relaxed_nj(nj_args, dmat); } if(!tree) { fprintf(stderr, "Clearcut: Failed to construct tree.\n"); exit(0); } /* RECORD THE PRECISE TIME OF THE END OF THE NEIGHBOR-JOINING */ gettimeofday(&tv, nullptr); endUs = ((unsigned long long) tv.tv_sec * 1000000ULL) + ((unsigned long long) tv.tv_usec); /* print the time taken to perform the neighbor join */ if(nj_args->verbose_flag) { if(nj_args->neighbor) { fprintf(stderr, "NJ tree built in %llu.%06llu secs\n", (endUs - startUs) / 1000000ULL, (endUs - startUs) % 1000000ULL); } else { fprintf(stderr, "RNJ tree built in %llu.%06llu secs\n", (endUs - startUs) / 1000000ULL, (endUs - startUs) % 1000000ULL); } } /* Output the neighbor joining tree here */ NJ_output_tree(nj_args, tree, dmat, i); NJ_free_tree(tree); /* Free the tree */ NJ_free_dmat(dmat); /* Free the working distance matrix */ /* * If we need to do another iteration, lets re-initialize * our working distance matrix. */ if(nj_args->ntrees > 1 && i<(nj_args->ntrees-1) ) { dmat = NJ_dup_dmat(dmat_backup); } } /* Free the backup distance matrix */ if(nj_args->ntrees > 1) { NJ_free_dmat(dmat_backup); } /* If verbosity, describe where the tree output is */ if(nj_args->verbose_flag) { if(nj_args->neighbor) { printf("NJ tree(s) in %s\n", nj_args->outfilename); } else { printf("Relaxed NJ tree(s) in %s\n", nj_args->outfilename); } } return 0; } /* * NJ_find_hmin() - Find minimum transformed values along horizontal * * * INPUTS: * ------- * dmat -- The distance matrix * a -- The index of the specific taxon in the distance matrix * * RETURNS: * -------- * -- The value of the selected minimum * min -- Used to transport the index of the minima out * of the function (by reference) * hmincount -- Return the number of minima along the horizontal * (by reference) * * * DESCRIPTION: * ------------ * * A fast, inline function to find the smallest transformed value * along the "horizontal" portion of an entry in a distance matrix. * * Distance matrices are stored internally as continguously-allocated * upper-diagonal structures. With the exception of the taxa at * row 0 of this upper-diagonal matrix, all taxa have both a horizontal * and vertical component in the distance matrix. This function * scans the horizonal portion of the entry in the distance matrix * for the specified taxon and finds the minimum transformed value * along that horizontal component. * * Since multiple minima can exist along the horizontal portion * of the entry, I consider all minima and break ties * stochastically to help avoid systematic bias. * * Just searching along the horizontal portion of a row is very fast * since the data is stored linearly and contiguously in memory and * cache locality is exploited in the distance matrix representation. * * Look at nj.h for more information on how the distance matrix * is architected. * */ static inline float NJ_find_hmin(DMAT *dmat, long int a, long int *min, long int *hmincount) { long int i; /* index variable for looping */ int size; /* current size of distance matrix */ int mindex = 0; /* holds the current index to the chosen minimum */ float curval; /* used to hold current transformed values */ float hmin; /* the value of the transformed minimum */ float *ptr, *r2, *val; /* pointers used to reduce dereferencing in inner loop */ /* values used for stochastic selection among multiple minima */ float p, x; long int smallcnt; /* initialize the min to something large */ hmin = (float)HUGE_VAL; /* setup some pointers to limit dereferencing later */ r2 = dmat->r2; val = dmat->val; size = dmat->size; /* initialize values associated with minima tie breaking */ p = 1.0; smallcnt = 0; ptr = &(val[NJ_MAP(a, a+1, size)]); /* at the start of the horiz. part */ for(i=a+1;i -- The value of the selected minimum * min -- Used to transport the index of the minima out * of the function (by reference) * vmincount -- The number of minima along the vertical * return by reference. * * DESCRIPTION: * ------------ * * A fast, inline function to find the smallest transformed value * along the "vertical" portion of an entry in a distance matrix. * * Distance matrices are stored internally as continguously-allocated * upper-diagonal matrices. With the exception of the taxa at * row 0 of this upper-diagonal matrix, all taxa have both a horizontal * and vertical component in the distance matrix. This function * scans the vertical portion of the entry in the distance matrix * for the specified taxon and finds the minimum transformed value * along that vertical component. * * Since multiple minima can exist along the vertical portion * of the entry, I consider all minima and break ties * stochastically to help avoid systematic bias. * * Due to cache locality reasons, searching along the vertical * component is going to be considerably slower than searching * along the horizontal. * * Look at nj.h for more information on how the distance matrix * is architected. * */ static inline float NJ_find_vmin(DMAT *dmat, long int a, long int *min, long int *vmincount) { long int i; /* index variable used for looping */ long int size; /* track the size of the matrix */ long int mindex = 0;/* track the index to the minimum */ float curval; /* track value of current transformed distance */ float vmin; /* the index to the smallest "vertical" minimum */ /* pointers which are used to reduce pointer dereferencing in inner loop */ float *ptr, *r2, *val; /* values used in stochastically breaking ties */ float p, x; long int smallcnt; /* initialize the vertical min to something really big */ vmin = (float)HUGE_VAL; /* save off some values to limit dereferencing later */ r2 = dmat->r2; val = dmat->val; size = dmat->size; p = 1.0; smallcnt = 0; /* start on the first row and work down */ ptr = &(val[NJ_MAP(0, a, size)]); for(i=0;isize; val = dmat->val; r = dmat->r+a+1; /* * Loop through the rows and decrement the stored r values * by the distances stored in the rows and columns of the distance * matrix which are being removed post-join. * * We do the rows altogether in order to benefit from cache locality. */ ptrx = &(val[NJ_MAP(a, a+1, size)]); ptry = &(val[NJ_MAP(b, b+1, size)]); for(i=a+1;ib) { *r -= *(ptry++); } r++; } /* Similar to the above loop, we now do the columns */ ptrx = &(val[NJ_MAP(0, a, size)]); ptry = &(val[NJ_MAP(0, b, size)]); r = dmat->r; for(i=0;isize-1) { /* if we can't do a row here, lets do a column */ if(a==0) { if(b==1) { target = 2; } else { target = 1; } } else { target = 0; } } else { target = b+1; } /* distance between a and the root of clade (a,b) */ a2clade = ( (dmat->val[NJ_MAP(a, b, dmat->size)]) + (dmat->r2[a] - dmat->r2[b]) ) / 2.0; /* distance between b and the root of clade (a,b) */ b2clade = ( (dmat->val[NJ_MAP(a, b, dmat->size)]) + (dmat->r2[b] - dmat->r2[a]) ) / 2.0; /* distance between the clade (a,b) and the target taxon */ if(bval[NJ_MAP(a, target, dmat->size)] - a2clade) + (dmat->val[NJ_MAP(b, target, dmat->size)] - b2clade) ) / 2.0; /* * Check to see that distance from clade root to target + distance from * b to clade root are equal to the distance from b to the target */ if(NJ_FLT_EQ(dmat->val[NJ_MAP(b, target, dmat->size)], (clade_dist + b2clade))) { return(1); /* join is legitimate */ } else { return(0); /* join is illigitimate */ } } else { /* compute the distance from the clade root to the target */ clade_dist = ( (dmat->val[NJ_MAP(target, a, dmat->size)] - a2clade) + (dmat->val[NJ_MAP(target, b, dmat->size)] - b2clade) ) / 2.0; /* * Check to see that distance from clade root to target + distance from * b to clade root are equal to the distance from b to the target */ if(NJ_FLT_EQ(dmat->val[NJ_MAP(target, b, dmat->size)], (clade_dist + b2clade))) { return(1); /* join is legitimate */ } else { return(0); /* join is illegitimate */ } } } /* * NJ_check() - Check to see if two taxa can be joined * * INPUTS: * ------- * nj_args -- Pointer to the data structure holding command-line args * dmat -- distance matrix * a -- index into dmat for one of the rows to be joined * b -- index into dmat for another row to be joined * min -- the minimum value found * additivity -- a flag (0 = not additive mode, 1 = additive mode) * * OUTPUTS: * -------- * int 1 if join is okay * 0 if join is not okay * * DESCRIPTION: * ------------ * * This function ultimately takes two rows and makes sure that the * intersection of those two rows, which has a transformed distance of * "min", is actually the smallest (or equal to the smallest) * transformed distance for both rows (a, b). If so, it returns * 1, else it returns 0. * * Basically, we want to join two rows only if the minimum * transformed distance on either row is at the intersection of * those two rows. * */ static inline int NJ_check(NJ_ARGS *nj_args, DMAT *dmat, long int a, long int b, float min, int additivity) { long int i, size; float *ptr, *val, *r2; /* some aliases for speed and readability reasons */ val = dmat->val; r2 = dmat->r2; size = dmat->size; /* now determine if joining a, b will result in broken distances */ if(additivity) { if(!NJ_check_additivity(dmat, a, b)) { return(0); } } /* scan the horizontal of row b, punt if anything < min */ ptr = &(val[NJ_MAP(b, b+1, size)]); for(i=b+1;inorandom) { /* if we are doing random joins, we checked this */ ptr = val + a; for(i=0;i reduce pointer dereferencing */ float a2clade; /* distance from a to the new node that joins a and b */ float b2clade; /* distance from b to the new node that joins a and b */ float cval; /* stores distance information during loop */ float *vptr; /* pointer to elements in first row of dist matrix */ float *ptra; /* pointer to elements in row a of distance matrix */ float *ptrb; /* pointer to elements in row b of distance matrix */ float *val, *r, *r2; /* simply used to limit pointer dereferencing */ /* We must assume that a < b */ if(a >= b) { fprintf(stderr, "Clearcut: (aval; r = dmat->r; r2 = dmat->r2; size = dmat->size; /* compute the distance from the clade components (a, b) to the new node */ a2clade = ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[a] - dmat->r2[b]) ) / 2.0; b2clade = ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[b] - dmat->r2[a]) ) / 2.0; r[a] = 0.0; /* we are removing row a, so clear dist. in r */ /* * Fill the horizontal part of the "a" row and finish computing r and r2 * we handle the horizontal component first to maximize cache locality */ ptra = &(val[NJ_MAP(a, a+1, size)]); /* start ptra at the horiz. of a */ ptrb = &(val[NJ_MAP(a+1, b, size)]); /* start ptrb at comparable place */ for(i=a+1;ir = r+1; /* * Collapse r2 here by copying contents of r2[0] into r2[b] and * incrementing pointer to the beginning of r2 by one row */ r2[b] = r2[0]; dmat->r2 = r2+1; /* increment dmat pointer to next row */ dmat->val += size; /* decrement the total size of the distance matrix by one row */ dmat->size--; return; } /* * NJ_neighbor_joining() - Perform a traditional Neighbor-Joining * * * INPUTS: * ------- * nj_args -- A pointer to a structure containing the command-line arguments * dmat -- A pointer to the distance matrix * * RETURNS: * -------- * NJ_TREE * -- A pointer to the Neighbor-Joining tree. * * DESCRIPTION: * ------------ * * This function performs a traditional Neighbor-Joining operation in which * the distance matrix is exhaustively searched for the global minimum * transformed distance. The two nodes which intersect at the global * minimum transformed distance are then joined and the distance * matrix is collapsed. This process continues until there are only * two nodes left, at which point those nodes are joined. * */ NJ_TREE * NJ_neighbor_joining(NJ_ARGS *nj_args, DMAT *dmat) { NJ_TREE *tree = nullptr; NJ_VERTEX *vertex = nullptr; long int a, b; float min; /* initialize the r and r2 vectors */ NJ_init_r(dmat); /* allocate and initialize our vertex vector used for tree construction */ vertex = NJ_init_vertex(dmat); if(!vertex) { fprintf(stderr, "Clearcut: Could not initialize vertex in NJ_neighbor_joining()\n"); return(nullptr); } /* we iterate until the working distance matrix has only 2 entries */ while(vertex->nactive > 2) { /* * Find the global minimum transformed distance from the distance matrix */ min = NJ_min_transform(dmat, &a, &b); /* * Build the tree by removing nodes a and b from the vertex array * and inserting a new internal node which joins a and b. Collapse * the vertex array similarly to how the distance matrix and r and r2 * are compacted. */ NJ_decompose(dmat, vertex, a, b, 0); /* decrement the r and r2 vectors by the distances corresponding to a, b */ NJ_compute_r(dmat, a, b); /* compact the distance matrix and the r and r2 vectors */ NJ_collapse(dmat, vertex, a, b); } /* Properly join the last two nodes on the vertex list */ tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST); /* return the computed tree to the calling function */ return(tree); } /* * NJ_relaxed_nj() - Construct a tree using the Relaxed Neighbor-Joining * * INPUTS: * ------- * nj_args -- A pointer to a data structure containing the command-line args * dmat -- A pointer to the distance matrix * * RETURNS: * -------- * * NJ_TREE * -- A pointer to a Relaxed Neighbor-Joining tree * * DESCRIPTION: * ------------ * * This function implements the Relaxed Neighbor-Joining algorithm of * Evans, J., Sheneman, L., and Foster, J. * * Relaxed Neighbor-Joining works by choosing a local minimum transformed * distance when determining when to join two nodes. (Traditional * Neighbor-Joining chooses a global minimum transformed distance). * * The algorithm shares the property with traditional NJ that if the * input distances are additive (self-consistent), then the algorithm * will manage to construct the true tree consistent with the additive * distances. Additivity state is tracked and every proposed join is checked * to make sure it maintains additivity constraints. If no * additivity-preserving join is possible in a single pass, then the distance * matrix is non-additive, and additivity checking is abandoned. * * The algorithm will either attempt joins randomly, or it will perform joins * in a particular order. The default behavior is to perform joins randomly, * but this can be switched off with a command-line switch. * * For randomized joins, all attempts are made to alleviate systematic bias * for the choice of rows to joins. All tie breaking is done in a way which * is virtually free of bias. * * To perform randomized joins, a random permutation is constructed which * specifies the order in which to attempt joins. I iterate through the * random permutation, and for each row in the random permutation, I find * the minimum transformed distance for that row. If there are multiple * minima, I break ties evenly. For the row which intersects our * randomly chosen row at the chosen minimum, if we are are still in * additivity mode, I check to see if joining the two rows will break * our additivity constraints. If not, I check to see if there exists * a transformed distance which is smaller than the minimum found on the * original row. If there is, then we proceed through the random permutation * trying additional rows in the random order specified in the permutation. * If there is no smaller minimum transformed distance on either of the * two rows, then we join them, collapse the distance matrix, and compute * a new random permutation. * * If the entire random permutation is traversed and no joins are possible * due to additivity constraints, then the distance matrix is not * additive, and additivity constraint-checking is disabled. * */ NJ_TREE * NJ_relaxed_nj(NJ_ARGS *nj_args, DMAT *dmat) { NJ_TREE *tree; NJ_VERTEX *vertex; long int a, b, t, bh, bv, i; float hmin, vmin, hvmin; float p, q, x; int join_flag; int additivity_mode; long int hmincount, vmincount; long int *permutation = nullptr; /* initialize the r and r2 vectors */ NJ_init_r(dmat); additivity_mode = 1; /* allocate the permutation vector, if we are in randomize mode */ if(!nj_args->norandom) { permutation = (long int *)calloc(dmat->size, sizeof(long int)); if(!permutation) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_relaxed_nj()\n"); return(nullptr); } } /* allocate and initialize our vertex vector used for tree construction */ vertex = NJ_init_vertex(dmat); /* loop until there are only 2 nodes left to join */ while(vertex->nactive > 2) { switch(nj_args->norandom) { /* RANDOMIZED JOINS */ case 0: join_flag = 0; NJ_permute(permutation, dmat->size-1); for(i=0;isize-1 && (vertex->nactive>2) ;i++) { a = permutation[i]; /* find min trans dist along horiz. of row a */ hmin = NJ_find_hmin(dmat, a, &bh, &hmincount); if(a) { /* find min trans dist along vert. of row a */ vmin = NJ_find_vmin(dmat, a, &bv, &vmincount); } else { vmin = hmin; bv = bh; vmincount = 0; } if(NJ_FLT_EQ(hmin, vmin)) { /* * The minima along the vertical and horizontal are * the same. Compute the proportion of minima along * the horizonal (p) and the proportion of minima * along the vertical (q). * * If the same minima exist along the horizonal and * vertical, we break the tie in a way which is * non-biased. That is, we break the tie based on the * proportion of horiz. minima versus vertical minima. * */ p = (float)hmincount / ((float)hmincount + (float)vmincount); q = 1.0 - p; x = genrand_real2(); if(x < p) { hvmin = hmin; b = bh; } else { hvmin = vmin; b = bv; } } else if(NJ_FLT_LT(hmin, vmin) ) { hvmin = hmin; b = bh; } else { hvmin = vmin; b = bv; } if(NJ_check(nj_args, dmat, a, b, hvmin, additivity_mode)) { /* swap a and b, if necessary, to make sure a < b */ if(b < a) { t = a; a = b; b = t; } join_flag = 1; /* join taxa from rows a and b */ NJ_decompose(dmat, vertex, a, b, 0); /* collapse matrix */ NJ_compute_r(dmat, a, b); NJ_collapse(dmat, vertex, a, b); NJ_permute(permutation, dmat->size-1); } } /* turn off additivity if go through an entire cycle without joining */ if(!join_flag) { additivity_mode = 0; } break; /* DETERMINISTIC JOINS */ case 1: join_flag = 0; for(a=0;asize-1 && (vertex->nactive > 2) ;) { /* find the min along the horizontal of row a */ hmin = NJ_find_hmin(dmat, a, &b, &hmincount); if(NJ_check(nj_args, dmat, a, b, hmin, additivity_mode)) { join_flag = 1; /* join taxa from rows a and b */ NJ_decompose(dmat, vertex, a, b, 0); /* collapse matrix */ NJ_compute_r(dmat, a, b); NJ_collapse(dmat, vertex, a, b); if(a) { a--; } } else { a++; } } /* turn off additivity if go through an entire cycle without joining */ if(!join_flag) { additivity_mode = 0; } break; } } /* WHILE */ /* Join the last two nodes on the vertex list */ tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST); if(nj_args->verbose_flag) { if(additivity_mode) { printf("Tree is additive\n"); } else { printf("Tree is not additive\n"); } } if(vertex) { NJ_free_vertex(vertex); } if(!nj_args->norandom && permutation) { free(permutation); } return(tree); } /* * NJ_print_distance_matrix() - * * Print a distance matrix * */ void NJ_print_distance_matrix(DMAT *dmat) { long int i, j; printf("ntaxa: %ld\n", dmat->ntaxa); printf(" size: %ld\n", dmat->size); for(i=0;isize;i++) { for(j=0;jsize;j++) { if(j>i) { printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)]); } else { printf(" -"); } } if(dmat->r && dmat->r2) { printf("\t\t%0.4f", dmat->r[i]); printf("\t%0.4f", dmat->r2[i]); printf("\n"); for(j=0;jsize;j++) { if(j>i) { printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)] - (dmat->r2[i] + dmat->r2[j])); } else { printf(" "); } } printf("\n"); } } printf("\n\n"); return; } /* * NJ_output_tree() - * * A wrapper for the function that really prints the tree, * basically to get a newline in there conveniently. :-) * * Print n trees, as specified in command-args * using "count" variable from 0 to (n-1) * */ void NJ_output_tree(NJ_ARGS *nj_args, NJ_TREE *tree, DMAT *dmat, long int count) { FILE *fp; if(nj_args->stdout_flag) { fp = stdout; } else { if(count == 0) { fp = fopen(nj_args->outfilename, "w"); /* open for writing */ } else { fp = fopen(nj_args->outfilename, "a"); /* open for appending */ } if(!fp) { fprintf(stderr, "Clearcut: Failed to open outfile %s\n", nj_args->outfilename); exit(-1); } } NJ_output_tree2(fp, nj_args, tree, tree, dmat); fprintf(fp, ";\n"); if(!nj_args->stdout_flag) { fclose(fp); } return; } /* * NJ_output_tree2() - * * */ void NJ_output_tree2(FILE *fp, NJ_ARGS *nj_args, NJ_TREE *tree, NJ_TREE *root, DMAT *dmat) { if(!tree) { return; } if(tree->taxa_index != NJ_INTERNAL_NODE) { if(nj_args->expblen) { fprintf(fp, "%s:%e", dmat->taxaname[tree->taxa_index], tree->dist); } else { fprintf(fp, "%s:%f", dmat->taxaname[tree->taxa_index], tree->dist); } } else { if(tree->left && tree->right) { fprintf(fp, "("); } if(tree->left) { NJ_output_tree2(fp, nj_args, tree->left, root, dmat); } if(tree->left && tree->right) { fprintf(fp, ","); } if(tree->right) { NJ_output_tree2(fp, nj_args, tree->right, root, dmat); } if(tree != root->left) { if(tree->left && tree->right) { if(tree != root) { if(nj_args->expblen) { fprintf(fp, "):%e", tree->dist); } else { fprintf(fp, "):%f", tree->dist); } } else { fprintf(fp, ")"); } } } else { fprintf(fp, ")"); } } return; } /* * NJ_init_r() * * This function computes the r column in our matrix * */ void NJ_init_r(DMAT *dmat) { long int i, j, size; long int index; float *r, *r2, *val; long int size1; float size2; r = dmat->r; r2 = dmat->r2; val = dmat->val; size = dmat->size; size1 = size-1; size2 = (float)(size-2); index = 0; for(i=0;inodes = (NJ_TREE **)calloc(dmat->ntaxa, sizeof(NJ_TREE *)); vertex->nodes_handle = vertex->nodes; /* initialize our size and active variables */ vertex->nactive = dmat->ntaxa; vertex->size = dmat->ntaxa; /* initialize the nodes themselves */ for(i=0;intaxa;i++) { vertex->nodes[i] = (NJ_TREE *)calloc(1, sizeof(NJ_TREE)); vertex->nodes[i]->left = nullptr; vertex->nodes[i]->right = nullptr; vertex->nodes[i]->taxa_index = i; } return(vertex); } /* * NJ_decompose() - * * This function decomposes the star by creating new internal nodes * and joining two existing tree nodes to it * */ NJ_TREE * NJ_decompose(DMAT *dmat, NJ_VERTEX *vertex, long int x, long int y, int last_flag) { NJ_TREE *new_node; float x2clade, y2clade; /* compute the distance from the clade components to the new node */ if(last_flag) { x2clade = (dmat->val[NJ_MAP(x, y, dmat->size)]); } else { x2clade = (dmat->val[NJ_MAP(x, y, dmat->size)])/2 + ((dmat->r2[x] - dmat->r2[y])/2); } vertex->nodes[x]->dist = x2clade; if(last_flag) { y2clade = (dmat->val[NJ_MAP(x, y, dmat->size)]); } else { y2clade = (dmat->val[NJ_MAP(x, y, dmat->size)])/2 + ((dmat->r2[y] - dmat->r2[x])/2); } vertex->nodes[y]->dist = y2clade; /* allocate new node to connect two sub-clades */ new_node = (NJ_TREE *)calloc(1, sizeof(NJ_TREE)); new_node->left = vertex->nodes[x]; new_node->right = vertex->nodes[y]; new_node->taxa_index = NJ_INTERNAL_NODE; /* this is not a terminal node, no taxa index */ if(last_flag) { return(new_node); } vertex->nodes[x] = new_node; vertex->nodes[y] = vertex->nodes[0]; vertex->nodes = &(vertex->nodes[1]); vertex->nactive--; return(new_node); } /* * NJ_print_vertex() - * * For debugging, print the contents of the vertex * */ void NJ_print_vertex(NJ_VERTEX *vertex) { long int i; printf("Number of active nodes: %ld\n", vertex->nactive); for(i=0;inactive;i++) { printf("%ld ", vertex->nodes[i]->taxa_index); } printf("\n"); return; } /* * NJ_print_r() - * */ void NJ_print_r(DMAT *dmat) { long int i; printf("\n"); for(i=0;isize;i++) { printf("r[%ld] = %0.2f\n", i, dmat->r[i]); } printf("\n"); return; } /* * NJ_print_taxanames() - * * Print taxa names here * */ void NJ_print_taxanames(DMAT *dmat) { long int i; printf("Number of taxa: %ld\n", dmat->ntaxa); for(i=0;intaxa;i++) { printf("%ld) %s\n", i, dmat->taxaname[i]); } printf("\n"); return; } /* * NJ_shuffle_distance_matrix() - * * Randomize a distance matrix here * */ void NJ_shuffle_distance_matrix(DMAT *dmat) { long int *perm = nullptr; char **tmp_taxaname = nullptr; float *tmp_val = nullptr; long int i, j; /* alloc the random permutation and a new matrix to hold the shuffled vals */ perm = (long int *)calloc(dmat->size, sizeof(long int)); tmp_taxaname = (char **)calloc(dmat->size, sizeof(char *)); tmp_val = (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float)); if(!tmp_taxaname || !perm || !tmp_val) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_shuffle_distance_matrix()\n"); exit(-1); } /* compute a permutation which will describe how to shuffle the matrix */ NJ_permute(perm, dmat->size); for(i=0;isize;i++) { for(j=i+1;jsize;j++) { if(perm[j] < perm[i]) { tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[j], perm[i], dmat->size)]; } else { tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[i], perm[j], dmat->size)]; } } tmp_taxaname[i] = dmat->taxaname[perm[i]]; } /* free our random permutation */ if(perm) { free(perm); } /* free the old value matrix */ if(dmat->val) { free(dmat->val); } /* re-assign the value matrix pointers */ dmat->val = tmp_val; dmat->valhandle = dmat->val; /* * Free our old taxaname with its particular ordering * and re-assign to the new. */ if(dmat->taxaname) { free(dmat->taxaname); } dmat->taxaname = tmp_taxaname; return; } /* * NJ_free_tree() - * * Free a given NJ tree */ void NJ_free_tree(NJ_TREE *node) { if(!node) { return; } if(node->left) { NJ_free_tree(node->left); } if(node->right) { NJ_free_tree(node->right); } free(node); return; } /* * NJ_print_permutation() * * Print a permutation * */ void NJ_print_permutation(long int *perm, long int size) { long int i; for(i=0;intaxa = src->ntaxa; dest->size = src->size; /* allocate space for array of pointers to taxanames */ dest->taxaname = (char **)calloc(dest->ntaxa, sizeof(char *)); if(!dest->taxaname) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n"); goto XIT_BAD; } /* allocate space for the taxanames themselves */ for(i=0;intaxa;i++) { dest->taxaname[i] = (char *)calloc(strlen(src->taxaname[i])+1, sizeof(char)); if(!dest->taxaname[i]) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n"); goto XIT_BAD; } } /* allocate space for the distance values */ dest->val = (float *)calloc(NJ_NCELLS(src->ntaxa), sizeof(float)); if(!dest->val) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n"); goto XIT_BAD; } /* allocate space for the r and r2 vectors */ dest->r = (float *)calloc(src->ntaxa, sizeof(float)); dest->r2 = (float *)calloc(src->ntaxa, sizeof(float)); /* copy titles */ for(i=0;intaxa;i++) { strcpy(dest->taxaname[i], src->taxaname[i]); } /* copy values */ memcpy(dest->val, src->valhandle, NJ_NCELLS(src->ntaxa)*sizeof(float)); /* copy r and r2 */ memcpy(dest->r, src->rhandle, src->ntaxa*sizeof(float)); memcpy(dest->r2, src->r2handle, src->ntaxa*sizeof(float)); /* track some memory addresses */ dest->valhandle = dest->val; dest->rhandle = dest->r; dest->r2handle = dest->r2; return(dest); XIT_BAD: /* free what we may have allocated */ NJ_free_dmat(dest); return(nullptr); } /* * NJ_free_dmat() - */ void NJ_free_dmat(DMAT *dmat) { long int i; if(dmat) { if(dmat->taxaname) { for(i=0;intaxa;i++) { if(dmat->taxaname[i]) { free(dmat->taxaname[i]); } } free(dmat->taxaname); } if(dmat->valhandle) { free(dmat->valhandle); } if(dmat->rhandle) { free(dmat->rhandle); } if(dmat->r2handle) { free(dmat->r2handle); } free(dmat); } return; } /* * NJ_free_vertex() - * * Free the vertex data structure * */ void NJ_free_vertex(NJ_VERTEX *vertex) { if(vertex) { if(vertex->nodes_handle) { free(vertex->nodes_handle); } free(vertex); } return; } /* * * NJ_min_transform() - Find the smallest transformed value to identify * which nodes to join. * * INPUTS: * ------- * dmat -- The distance matrix * * RETURNS: * -------- * -- The minimimum transformed distance * ret_i -- The row of the smallest transformed distance (by reference) * ret_j -- The col of the smallest transformed distance (by reference) * * * DESCRIPTION: * ------------ * * Used only with traditional Neighbor-Joining, this function checks the entire * working distance matrix and identifies the smallest transformed distance. * This requires traversing the entire diagonal matrix, which is itself a * O(N^2) operation. * */ float NJ_min_transform(DMAT *dmat, long int *ret_i, long int *ret_j) { long int i, j; /* indices used for looping */ long int tmp_i = 0;/* to limit pointer dereferencing */ long int tmp_j = 0;/* to limit pointer dereferencing */ float smallest; /* track the smallest trans. dist */ float curval; /* the current trans. dist in loop */ float *ptr; /* pointer into distance matrix */ float *r2; /* pointer to r2 matrix for computing transformed dists */ smallest = (float)HUGE_VAL; /* track these here to limit pointer dereferencing in inner loop */ ptr = dmat->val; r2 = dmat->r2; /* for every row */ for(i=0;isize;i++) { ptr++; /* skip diagonal */ for(j=i+1;jsize;j++) { /* for every column */ /* find transformed distance in matrix at i, j */ curval = *(ptr++) - (r2[i] + r2[j]); /* if the transformed distanance is less than the known minimum */ if(curval < smallest) { smallest = curval; tmp_i = i; tmp_j = j; } } } /* pass back (by reference) the coords of the min. transformed distance */ *ret_i = tmp_i; *ret_j = tmp_j; return(smallest); /* return the min transformed distance */ } mothur-1.48.0/source/clearcut/clearcut.h000077500000000000000000000165701424121717000202120ustar00rootroot00000000000000 /* * clearcut.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_CLEARCUT_H_ #define _INC_CLEARCUT_H_ 1 extern "C" { #include "common.h" #include "cmdargs.h" #define NJ_VERSION "1.0.9" #define NJ_INTERNAL_NODE -1 #define NJ_LAST 101 #define NJ_INPUT_MODE_UNKNOWN 0 #define NJ_INPUT_MODE_DISTANCE 100 #define NJ_INPUT_MODE_UNALIGNED_SEQUENCES 101 #define NJ_INPUT_MODE_ALIGNED_SEQUENCES 102 #define NJ_MODEL_NONE 100 #define NJ_MODEL_JUKES 101 #define NJ_MODEL_KIMURA 102 /* * DMAT - Distance Matrix * * This is arguably the most important structure in the * program. This is the distance matrix, and it is used * by many functions throughout the application. * * The matrix is architected as a contiguously allocated * upper-diagonal matrix of floats which include the * diagonal. * * Example: * * 0 1 2 3 4 5 * 0 0.0 1.0 0.3 0.2 0.1 0.3 * 1 0.0 0.3 0.2 0.1 0.8 * 2 0.0 0.1 0.3 0.5 * 3 0.0 0.2 0.1 * 4 0.0 0.2 * 5 0.0 * * The distance matrix shrinks with every join operation, * so I track the original and working size of the matrix * inside the matrix. * * One fast optimization to shrink the distance matrix * involves incrementing the "val" pointer. Thus, in * addition to tracking the pointer to the distances, * I also track the original pointer to that I can * free the memory associated with the working distance * matrix. * * This also applies to the r and r2 vectors which are * used to compute the transformed distances in the * matrix. * */ typedef struct _STRUCT_DMAT { long int ntaxa; /* the original size of the distance matrix */ long int size; /* the current/effective size of the distance matrix */ char **taxaname; /* a pointer to an array of taxa name strings */ float *val; /* the distances */ float *valhandle; /* to track the orig. pointer to free memory */ float *r, *r2; /* r and r2 vectors (used to compute transformed dists) */ float *rhandle, *r2handle; /* track orig. pointers to free memory */ } DMAT; /* * NJ_TREE - The Tree Data Structure * * * The tree is represented internally as a rooted * binary tree. Each internal node has a left and a right child. * * Additionally, I track the distance between the current node * and that node's parent (i.e. the branch length). * * Finally, I track the index of the taxa for leaf nodes. * */ typedef struct _STRUCT_NJ_TREE { struct _STRUCT_NJ_TREE *left; /* left child */ struct _STRUCT_NJ_TREE *right; /* right child */ float dist; /* branch length. i.e. dist from node to parent */ long int taxa_index; /* for terminal nodes, track the taxon index */ } NJ_TREE; /* * NJ_VERTEX * * This structure is used for building trees. It is a vector * which, represents the center of the star when building the RNJ/NJ * tree through star-decomposition. * * It contains a vector of tree (node) pointers. These pointers * get joined together by a new internal node, and the new internal * node is placed back into the vector of nodes (which is now smaller). * * To keep this vector in sync. with the shrinking matrix, parts of * the vector are shuffled around, and so a pointer to the originally * allocated vector is stored such that it can be freed from memory * later. * * The original and working sizes of the vector are also tracked. * */ typedef struct _STRUCT_NJ_VERTEX { NJ_TREE **nodes; NJ_TREE **nodes_handle; /* original memory handle for freeing */ long int nactive; /* number of active nodes in the list */ long int size; /* the total size of the vertex */ } NJ_VERTEX; /* some function prototypes */ int clearcut_main(int, char**); /* core function for performing Relaxed Neighbor Joining */ NJ_TREE * NJ_relaxed_nj(NJ_ARGS *nj_args, DMAT *dmat); /* function for performing traditional Neighbor-Joining */ NJ_TREE * NJ_neighbor_joining(NJ_ARGS *nj_args, DMAT *dmat); /* print the distance matrix (for debugging) */ void NJ_print_distance_matrix(DMAT *dmat); /* output the computed tree to stdout or to the specified file */ void NJ_output_tree(NJ_ARGS *nj_args, NJ_TREE *tree, DMAT *dmat, long int count); /* the recursive function for outputting trees */ void NJ_output_tree2(FILE *fp, NJ_ARGS *nj_args, NJ_TREE *tree, NJ_TREE *root, DMAT *dmat); /* initialize vertex */ NJ_VERTEX * NJ_init_vertex(DMAT *dmat); /* used to decompose the star topology and build the tree */ NJ_TREE * NJ_decompose(DMAT *dmat, NJ_VERTEX *vertex, long int x, long int y, int last_flag); /* print the vertex vector (for debugging) */ void NJ_print_vertex(NJ_VERTEX *vertex); /* print taxa names (for debugging) */ void NJ_print_taxanames(DMAT *dmat); /* initialize r-vector prior to RNJ/NJ */ void NJ_init_r(DMAT *dmat); /* print the r-vector (for debugging) */ void NJ_print_r(DMAT *dmat); /* shuffle the distance matrix, usually after reading in input */ void NJ_shuffle_distance_matrix(DMAT *dmat); /* free memory from the tree */ void NJ_free_tree(NJ_TREE *node); /* print permutations (for debugging) */ void NJ_print_permutation(long int *perm, long int size); /* duplicate a distance matrix for multiple iterations */ DMAT * NJ_dup_dmat(DMAT *src); /* free the distance matrix */ void NJ_free_dmat(DMAT *dmat); /* free the vertex vector */ void NJ_free_vertex(NJ_VERTEX *vertex); /* for computing the global minimum transformed distance in traditional NJ */ float NJ_min_transform(DMAT *dmat, long int *ret_i, long int *ret_j); } #endif /* _INC_CLEARCUT_H_ */ mothur-1.48.0/source/clearcut/cmdargs.cpp000077500000000000000000000351521424121717000203600ustar00rootroot00000000000000/* * cmdargs.c * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #include #include #include #include //#ifdef USE_GNU //#include //#else #include "getopt_long.h" //#endif /* USE_GNU*/ #include "clearcut.h" #include "cmdargs.h" /* * NJ_handle_args() - * */ NJ_ARGS * NJ_handle_args(int argc, char *argv[]) { static NJ_ARGS nj_args; int option_index, c; optind = 0; //neccasary to read in arguments if code is run more than once struct option NJ_long_options[] = { /* These options don't set a flag */ {"in", required_argument, nullptr, 'i'}, {"out", required_argument, nullptr, 'o'}, {"seed", required_argument, nullptr, 's'}, {"matrixout", required_argument, nullptr, 'm'}, {"ntrees", required_argument, nullptr, 'n'}, /* These options set a flag */ {"verbose", no_argument, &(nj_args.verbose_flag), 1}, {"quiet", no_argument, &(nj_args.quiet_flag), 1}, {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE}, {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES}, {"help", no_argument, &(nj_args.help), 1}, {"version", no_argument, &(nj_args.version), 1}, {"norandom", no_argument, &(nj_args.norandom), 1}, {"shuffle", no_argument, &(nj_args.shuffle), 1}, {"stdin", no_argument, &(nj_args.stdin_flag), 1}, {"stdout", no_argument, &(nj_args.stdout_flag), 1}, {"dna", no_argument, &(nj_args.dna_flag), 1}, {"DNA", no_argument, &(nj_args.dna_flag), 1}, {"protein", no_argument, &(nj_args.protein_flag), 1}, {"neighbor", no_argument, &(nj_args.neighbor), 1}, {"expblen", no_argument, &(nj_args.expblen), 1}, {"expdist", no_argument, &(nj_args.expdist), 1}, {"jukes", no_argument, &(nj_args.jukes_flag), 1}, {"kimura", no_argument, &(nj_args.kimura_flag), 1}, {0, 0, 0, 0} }; /* initializes options to their default */ nj_args.infilename = nullptr; nj_args.outfilename = nullptr; nj_args.matrixout = nullptr; nj_args.seed = time(0); nj_args.verbose_flag = 0; nj_args.quiet_flag = 0; nj_args.input_mode = NJ_INPUT_MODE_DISTANCE; nj_args.help = 0; nj_args.version = 0; nj_args.norandom = 0; nj_args.shuffle = 0; nj_args.stdin_flag = 0; nj_args.stdout_flag = 0; nj_args.dna_flag = 0; nj_args.protein_flag = 0; nj_args.correction_model = NJ_MODEL_NONE; nj_args.jukes_flag = 0; nj_args.kimura_flag = 0; nj_args.neighbor = 0; nj_args.ntrees = 1; nj_args.expblen = 0; nj_args.expdist = 0; while(1) { c = getopt_long(argc, argv, "i:o:s:m:n:vqduahVSIOrDPjkNeE", NJ_long_options, &option_index); if(c == -1) { break; } //printf("%d\t%d\n", option_index, argc); //for (int red = 0; red < argc; red++) { printf("%s\n", argv[red]); } switch(c) { case 0: if(NJ_long_options[option_index].flag) { break; } printf("option %s", NJ_long_options[option_index].name); if(optarg) { printf(" with arg %s", optarg); } printf("\n"); break; case 'i': nj_args.infilename = optarg; break; case 'o': nj_args.outfilename = optarg; break; case 's': nj_args.seed = atoi(optarg); break; case 'm': nj_args.matrixout = optarg; break; case 'n': nj_args.ntrees = atoi(optarg); break; case 'v': nj_args.verbose_flag = 1; break; case 'q': nj_args.quiet_flag = 1; break; case 'd': nj_args.input_mode = NJ_INPUT_MODE_DISTANCE; break; case 'a': nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES; break; case 'h': nj_args.help = 1; break; case 'V': nj_args.version = 1; break; case 'S': nj_args.shuffle = 1; break; case 'I': nj_args.stdin_flag = 1; break; case 'O': nj_args.stdin_flag = 1; break; case 'r': nj_args.norandom = 1; break; case 'D': nj_args.dna_flag = 1; break; case 'P': nj_args.protein_flag = 1; break; case 'j': nj_args.jukes_flag = 1; break; case 'k': nj_args.kimura_flag = 1; break; case 'N': nj_args.neighbor = 1; break; case 'e': nj_args.expblen = 1; break; case 'E': nj_args.expdist = 1; break; default: NJ_usage(); exit(-1); } } if(optind < argc) { fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]); NJ_usage(); exit(-1); } if(nj_args.version) { printf("Clearcut Version: %s\n", NJ_VERSION); //exit(0); } if(nj_args.help) { NJ_usage(); //exit(0); } /* if stdin & explicit filename are specified for input */ if(nj_args.stdin_flag) { if(nj_args.infilename) { fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n"); NJ_usage(); exit(-1); } } /* if stdout & explicit filename are specified for output */ if(nj_args.stdout_flag) { if(nj_args.outfilename) { fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n"); NJ_usage(); exit(-1); } } /* if user did not specify stdin or filename, default to stdin */ if(!nj_args.stdin_flag) { if(!nj_args.infilename) { fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n"); nj_args.stdin_flag = 1; } } /* if user did not specify stdout or filename, default to stdout */ if(!nj_args.stdout_flag) { if(!nj_args.outfilename) { fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n"); nj_args.stdout_flag = 1; } } /* User must specify distance matrix or alignment */ if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) { fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n"); NJ_usage(); exit(-1); } /* do not allow protein or DNA options for distance matrix input */ if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) { if(nj_args.dna_flag || nj_args.protein_flag) { fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n"); NJ_usage(); exit(-1); } } /* make sure different filenames were specified for input and output */ if(!nj_args.stdin_flag && !nj_args.stdout_flag) { if(!strcmp(nj_args.infilename, nj_args.outfilename)) { fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n"); NJ_usage(); exit(-1); } } /* make sure that user specifies DNA or Protein if dealing with alignment input */ if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) { if(!nj_args.dna_flag && !nj_args.protein_flag) { fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n"); NJ_usage(); exit(-1); } } /* make sure that user does not specify both protein and DNA when dealing with alignment input */ if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) { if(nj_args.dna_flag && nj_args.protein_flag) { fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n"); NJ_usage(); exit(-1); } } /* make sure verbose and quiet were not specified together */ if(nj_args.verbose_flag && nj_args.quiet_flag) { fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n"); NJ_usage(); exit(-1); } /* make sure that a correction model was specified only when providing an alignment */ if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) { if(nj_args.jukes_flag || nj_args.kimura_flag) { fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n"); NJ_usage(); exit(-1); } } else { if(nj_args.jukes_flag && nj_args.kimura_flag) { fprintf(stderr, "Clearcut: Only specify one correction model\n"); NJ_usage(); exit(-1); } else { if(nj_args.jukes_flag && !nj_args.kimura_flag) { nj_args.correction_model = NJ_MODEL_JUKES; } else if(nj_args.kimura_flag && !nj_args.jukes_flag) { nj_args.correction_model = NJ_MODEL_KIMURA; } else { nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */ } } } /* make sure that the number of output trees is reasonable */ if(nj_args.ntrees <= 0) { fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n"); NJ_usage(); exit(-1); } /* * make sure that if exponential distances are specified, * we are dealing with alignment input */ if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) { fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n"); NJ_usage(); exit(-1); } return(&nj_args); } /* * NJ_print_args() - * */ void NJ_print_args(NJ_ARGS *nj_args) { char input_mode[32]; switch (nj_args->input_mode) { case NJ_INPUT_MODE_DISTANCE: sprintf(input_mode, "Distance Matrix"); break; case NJ_INPUT_MODE_UNALIGNED_SEQUENCES: sprintf(input_mode, "Unaligned Sequences"); break; case NJ_INPUT_MODE_ALIGNED_SEQUENCES: sprintf(input_mode, "Aligned Sequences"); break; default: sprintf(input_mode, "UNKNOWN"); break; } printf("\n*** Command Line Arguments ***\n"); printf("Input Mode: %s\n", input_mode); if(nj_args->stdin_flag) { printf("Input from STDIN\n"); } else { printf("Input File: %s\n", nj_args->infilename); } if(nj_args->stdout_flag) { printf("Output from STDOUT\n"); } else { printf("Output File: %s\n", nj_args->outfilename); } if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) { if(nj_args->aligned_flag) { printf("Input Sequences Aligned: YES\n"); } else { printf("Input Sequences Aligned: NO\n"); } } if(nj_args->verbose_flag) { printf("Verbose Mode: ON\n"); } else { printf("Verbose Mode: OFF\n"); } if(nj_args->quiet_flag) { printf("Quiet Mode: ON\n"); } else { printf("Quiet Mode: OFF\n"); } if(nj_args->seed) { printf("Random Seed: %d\n", nj_args->seed); } printf("\n*******\n"); return; } /* * NJ_usage() - * * Print a usage message * */ void NJ_usage(void) { printf("Usage: clearcut --in= --out= [options]...\n"); printf("GENERAL OPTIONS:\n"); printf(" -h, --help Display this information.\n"); printf(" -V, --version Print the version of this program.\n"); printf(" -v, --verbose More output. (Default: OFF)\n"); printf(" -q, --quiet Silent operation. (Default: ON)\n"); printf(" -s, --seed= Explicitly set the PRNG seed to a specific value.\n"); printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n"); printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n"); printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n"); printf("\n"); printf("INPUT OPTIONS:\n"); printf(" -I, --stdin Read input from STDIN.\n"); printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n"); printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n"); printf(" -D, --DNA Input alignment are DNA sequences.\n"); printf(" -P, --protein Input alignment are protein sequences.\n"); printf("\n"); printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n"); printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n"); printf(" -k, --kimura Use Kimura correction for distance matrix.\n"); printf("\n"); printf("OUTPUT OPTIONS:\n"); printf(" -O, --stdout Output tree to STDOUT.\n"); printf(" -m, --matrixout= Output distance matrix to specified file.\n"); printf(" -n, --ntrees= Output n trees. (Default: 1)\n"); printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n"); printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n"); printf("\n"); printf("EXAMPLES:\n"); printf(" Compute tree by supplying distance matrix via stdin:\n"); printf(" clearcut --distance < distances.txt > treefile.tre\n"); printf("\n"); printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n"); printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n"); return; } mothur-1.48.0/source/clearcut/cmdargs.h000077500000000000000000000057021424121717000200230ustar00rootroot00000000000000/* * njdist.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_NJ_CMDARGS_H_ #define _INC_NJ_CMDARGS_H_ 1 #include "clearcut.h" /* some datatypes */ typedef struct _STRUCT_NJ_ARGS { char *infilename; /* the name of the input file */ char *outfilename; /* the name of the output tree */ char *matrixout; /* the name of the distance matrix output file */ int input_mode; int aligned_flag; int verbose_flag; int quiet_flag; int stdin_flag; int stdout_flag; int help; int version; int norandom; int shuffle; int dna_flag; int protein_flag; int seed; /* correction models for distance */ int correction_model; int jukes_flag; int kimura_flag; /* flag for using traditional neighbor-joining */ int neighbor; /* number of trees to output */ int ntrees; /* exponential notation output */ int expblen; /* exp notation for tree branch lengths */ int expdist; /* exp notation for distances in matrix output */ } NJ_ARGS; /* some function prototypes */ NJ_ARGS * NJ_handle_args(int argc, char *argv[]); void NJ_print_args(NJ_ARGS *nj_args); void NJ_usage(void); #endif /* _INC_NJ_CMDARGS_H_ */ mothur-1.48.0/source/clearcut/common.h000077500000000000000000000060161424121717000176720ustar00rootroot00000000000000/* * common.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * A header file filled with common definitions and simple inline functions * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_NJ_COMMON_H_ #define _INC_NJ_COMMON_H_ 1 #include #include #define NJ_AMBIGUITY_CHAR 63 /* ? character */ /* * this macro defines the number of cells in the diagonal matrix * based on the number of taxa involved * */ #define NJ_NCELLS(a) ( ((a)*(a+1))/2 ) /* * NJ_MAP() - * * Thus function maps i, j coordinates to the correct offset into * the distance matrix * */ static inline long int NJ_MAP(long int i, long int j, long int ntaxa) { return((i*(2*ntaxa-i-1))/2 + j); } static inline int NJ_FLT_EQ(float x, float y) { if(fabs(x - y) y) { return(1); } else { return(0); } } } #endif /* _INC_NJ_COMMON_H_ */ mothur-1.48.0/source/clearcut/distclearcut.cpp000077500000000000000000000344361424121717000214320ustar00rootroot00000000000000/* * dist.c * * $Id$ * * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Compute a distance matrix given a set of sequences * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #include #include #include #include #include #include "common.h" #include "dayhoff.h" #include "fasta.h" #include "distclearcut.h" /* * NJ_build_distance_matrix() - * * Given a filename for an alignment, read the alignment * into memory and then compute the distance matrix * using the appropriate correction model */ DMAT * NJ_build_distance_matrix(NJ_ARGS *nj_args) { DMAT *dmat; NJ_alignment *alignment; /* Read an alignment in FASTA format */ alignment = NJ_read_fasta(nj_args); if(!alignment) { return(nullptr); } /* * Given a global multiple sequence alignment (MSA) and * a specified distance correction model, compute a * corrected distance matrix * * From proteins, we may want to allow users to specify * a substitution matrix (feature) */ dmat = NJ_compute_dmat(nj_args, alignment); // NJ_print_taxanames(dmat); if(!dmat) { fprintf(stderr, "Clearcut: Error computing distance matrix\n"); } /* now free the memory associated with the alignment */ NJ_free_alignment(alignment); return(dmat); } /* * NJ_compute_dmat() - * * Given an alignment and a correction model, compute the * distance matrix and return it * */ DMAT * NJ_compute_dmat(NJ_ARGS *nj_args, NJ_alignment *alignment) { DMAT *dmat; long int i; /* allocate distance matrix here */ dmat = (DMAT *)calloc(1, sizeof(DMAT)); if(!dmat) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n"); return(nullptr); } dmat->ntaxa = alignment->nseq; dmat->size = alignment->nseq; /* allocate memory to hold the taxa names */ dmat->taxaname = (char **)calloc(alignment->nseq, sizeof(char *)); if(!dmat->taxaname) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n"); return(nullptr); } /* copy sequence titles */ for(i=0;inseq;i++) { dmat->taxaname[i] = (char *)calloc(strlen(alignment->titles[i])+1, sizeof(char)); if(!dmat->taxaname[i]) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n"); return(nullptr); } *dmat->taxaname[i] = '\0'; strncat(dmat->taxaname[i], alignment->titles[i], strlen(alignment->titles[i])+1); //strncpy(dmat->taxaname[i], alignment->titles[i], sizeof dmat->taxaname[i] - strlen (dmat->taxaname[i]) - 1); } /* allocate val matrix in dmat */ dmat->val = (float *)calloc(dmat->ntaxa*dmat->ntaxa, sizeof(float)); if(!dmat->val) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n"); return(nullptr); } /* now lets allocate space for the r and r2 columns */ dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float)); dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float)); /* track some memory addresses */ dmat->rhandle = dmat->r; dmat->r2handle = dmat->r2; dmat->valhandle = dmat->val; /* apply model correction to matrix */ switch(nj_args->correction_model) { case NJ_MODEL_JUKES: if(nj_args->dna_flag) { NJ_DNA_jc_correction(dmat, alignment); } else if(nj_args->protein_flag) { NJ_PROTEIN_jc_correction(dmat, alignment); } else { fprintf(stderr, "Clearcut: Need to know sequence type for Jukes-Cantor model correction.\n"); return(nullptr); } break; case NJ_MODEL_KIMURA: if(nj_args->dna_flag) { NJ_DNA_k2p_correction(dmat, alignment); } else if(nj_args->protein_flag) { NJ_PROTEIN_kimura_correction(dmat, alignment); } else { fprintf(stderr, "Clearcut: Need to know sequence type for Kimura model correction.\n"); return(nullptr); } break; case NJ_MODEL_NONE: NJ_no_correction(dmat, alignment); break; default: fprintf(stderr, "Clearcut: Invalid distance correction model.\n"); return(nullptr); } return(dmat); } /* * NJ_no_correction() - * * Compute the distance matrix without correction * (straight percent ID) * * Resolve ambiguities in sequence data by skipping * those nucleotides/residues * */ void NJ_no_correction(DMAT *dmat, NJ_alignment *alignment) { long int i, j; float pdiff; /* compute pairwise percent identity */ for(i=0;isize;i++) { for(j=i+1;jsize;j++) { pdiff = 1.0 - NJ_pw_percentid(alignment, i, j); dmat->val[NJ_MAP(i, j, dmat->size)] = pdiff; } } return; } /* * NJ_DNA_jc_correction() - * * Compute the distance matrix with jukes-cantor correction * and assign high distance if sequence divergence exceeds * 0.75 * * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.), * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132. * New York: Academic Press * */ void NJ_DNA_jc_correction(DMAT *dmat, NJ_alignment *alignment) { long int i, j; long int k; float d, cutoff, dist; long int residues; cutoff = 0.75; for(i=0;isize;i++) { for(j=i+1;jsize;j++) { k = NJ_pw_differences(alignment, i, j, &residues); d = 1.0 - NJ_pw_percentid(alignment, i, j); if(d > cutoff) { dist = NJ_BIGDIST; } else { dist = (-0.75) * log(1.0 - (4.0/3.0)*d); } if(fabs(dist) < FLT_EPSILON) { dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0; } else { dmat->val[NJ_MAP(i, j, dmat->size)] = dist; } } } return; } /* * NJ_PROTEIN_jc_correction() - * * This function performs modified jukes/cantor correction on * a protein alignment * * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.), * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132. * New York: Academic Press * */ void NJ_PROTEIN_jc_correction(DMAT *dmat, NJ_alignment *alignment) { long int i, j; long int residues; long int diff; float dist, x; for(i=0;isize;i++) { for(j=i+1;jsize;j++) { diff = NJ_pw_differences(alignment, i, j, &residues); if(!diff || !residues) { dist = 0.0; } else { dist = (float)diff/(float)residues; x = ((20.0/19.0)*dist); if(NJ_FLT_GT(x, 1.0)) { dist = NJ_BIGDIST; } else { dist = -(19.0/20.0) * log(1.0 - x); } } dmat->val[NJ_MAP(i, j, dmat->size)] = dist; } } return; } /* * NJ_DNA_k2p_correction() - * * Correct a distance matrix using k2p correction using * cutoffs to avoid problems with logarithms. * * dist = -0.5ln(1-2P-Q) - 0.25ln(1-2Q) * * But due to the logarithms, this is only valid when * * (2P+Q <= 1) && * (2Q <= 1) * * So assign arbitary distances when these constraints are * not strictly followed. * * Kimura, M. (1980), A simple method for estimating evolutionary * rates of base substitutions through comparative studies of * nucleotide sequences. J. Mol. Evol., 16, 111-120 * */ void NJ_DNA_k2p_correction(DMAT *dmat, NJ_alignment *alignment) { long int i, j; float P; /* proportion of transitions */ float Q; /* proportion of transversions */ long int nucleotides; long int transitions, transversions; float dist; float log_x = 0.0; /* the params for the first log */ float log_y = 0.0; /* the params for the second log */ int blowup; /* a flag to specify if we have a log blowup */ for(i=0;isize;i++) { for(j=i+1;jsize;j++) { blowup = 0; /* count the number of transitions and transversions */ NJ_DNA_count_tt(alignment, i, j, &transitions, &transversions, &nucleotides); if(!nucleotides) { /* sequences have no non-ambiguous overlap in alignment */ P = 0.0; Q = 0.0; } else { P = (float)transitions / (float)nucleotides; Q = (float)transversions / (float)nucleotides; } /* the first log blows up if 2*P+Q = 1.0 */ if(NJ_FLT_EQ((2.0 * P + Q), 1.0)) { blowup = 1; } else { if( NJ_FLT_LT(1.0 - 2.0*P - Q, 0.0) ) { blowup = 1; } else { log_x = log(1.0 - 2.0*P - Q); } } /* the second log blows up if 2*Q >= 1.0 */ if( NJ_FLT_EQ((2.0 * Q), 1.0) || NJ_FLT_GT((2.0 * Q), 1.0) ) { blowup = 1; } else { log_y = log(1.0 - 2.0*Q); } /* if our logarithms blow up, we just set the distance to the max */ if(blowup) { dist = NJ_BIGDIST; } else { dist = (-0.5)*log_x - 0.25*log_y; } if(fabs(dist) < FLT_EPSILON) { dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0; } else { dmat->val[NJ_MAP(i, j, dmat->size)] = dist; } } } return; } /* * NJ_PROTEIN_kimura_correction() - * * Perform Kimura correction for distances derived from protein * alignments. * * Kimura, M. (1983), The Neutral Theory of Molecular Evolution. * p. 75., Cambridge University Press, Cambridge, England * */ void NJ_PROTEIN_kimura_correction(DMAT *dmat, NJ_alignment *alignment) { long int i, j; long int residues; long int diff; float dist; printf("NJ_PROTEIN_kimura_correction()\n"); for(i=0;isize;i++) { for(j=i+1;jsize;j++) { diff = NJ_pw_differences(alignment, i, j, &residues); if(!diff || !residues) { dist = 0.0; } else { dist = (float)diff/(float)residues; } if(NJ_FLT_LT(dist, 0.75)) { if(NJ_FLT_GT(dist, 0.0) ) { dist = -log(1.0 - dist - (dist * dist/5.0) ); } } else { if(NJ_FLT_GT(dist, 0.93) ) { dist = 10.0; } else { dist = (float)NJ_dayhoff[ (int)((dist*1000.0)-750.0) ] / 100.0 ; } } dmat->val[NJ_MAP(i, j, dmat->size)] = dist; } } return; } /* * NJ_DNA_count_tt() - * * Count the number of transitions and transversions * between two aligned DNA sequences * * This routine automatically skips ambiguities when * counting transitions and transversions. * */ void NJ_DNA_count_tt(NJ_alignment *alignment, long int x, long int y, long int *transitions, long int *transversions, long int *residues) { long int tmp_transitions = 0; long int tmp_transversions = 0; long int tmp_residues = 0; char a, b; long int i; for(i=0;ilength;i++) { a = toupper(alignment->data[x*alignment->length+i]); b = toupper(alignment->data[y*alignment->length+i]); if( (a == 'A' && b == 'T') || (a == 'T' && b == 'A') || (a == 'A' && b == 'C') || (a == 'C' && b == 'A') || (a == 'T' && b == 'G') || (a == 'G' && b == 'T') || (a == 'C' && b == 'G') || (a == 'G' && b == 'C') ) { tmp_transversions++; } if( (a == 'C' && b == 'T') || (a == 'T' && b == 'C') || (a == 'G' && b == 'A') || (a == 'A' && b == 'G') ) { tmp_transitions++; } /* count the number of residues */ if(a != NJ_AMBIGUITY_CHAR && b != NJ_AMBIGUITY_CHAR ) { tmp_residues++; } } *transitions = tmp_transitions; *transversions = tmp_transversions; if(residues) { *residues = tmp_residues; } return; } /* * NJ_pw_percentid() - * * Given an alignment and a specification * for two rows, compute the pairwise * percent identity between the two * */ float NJ_pw_percentid(NJ_alignment *alignment, long int x, long int y) { float pid; long int i; long int residues; long int same; char c1, c2; residues = 0; same = 0; for(i=0;ilength;i++) { c1 = alignment->data[x*alignment->length+i]; c2 = alignment->data[y*alignment->length+i]; if( c1 != NJ_AMBIGUITY_CHAR || c2 != NJ_AMBIGUITY_CHAR ) { residues++; if(c1 == c2) { same++; } } } pid = (float)same/(float)residues; return(pid); } /* * NJ_pw_differences() - * * Given an alignment and a specification * for two rows in the alignment, compute the * number of differences between the two sequences * * With respect to ambiguity codes, we will want to * disregard those sites entirely in our count. * */ long int NJ_pw_differences(NJ_alignment *alignment, long int x, long int y, long int *residues) { long int i; long int diff; char c1, c2; long int tmp_residues; diff = 0; tmp_residues = 0; for(i=0;ilength;i++) { c1 = alignment->data[x*alignment->length+i]; c2 = alignment->data[y*alignment->length+i]; if( c1 != NJ_AMBIGUITY_CHAR || c2 != NJ_AMBIGUITY_CHAR ) { tmp_residues++; if(c1 != c2) { diff++; } } } *residues = tmp_residues; return(diff); } mothur-1.48.0/source/clearcut/distclearcut.h000077500000000000000000000061251424121717000210710ustar00rootroot00000000000000/* * dist.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Compute a distance matrix given a set of sequences * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_DIST_H_ #define _INC_DIST_H_ 1 #ifdef __cplusplus extern "C" { #endif #include "fasta.h" #include "clearcut.h" /* * An arbitrarily large distance to represent distances * which are too great to accurately correct. */ #define NJ_BIGDIST 10.0 /* some function prototypes */ DMAT * NJ_build_distance_matrix(NJ_ARGS *nj_args); DMAT * NJ_compute_dmat(NJ_ARGS *nj_args, NJ_alignment *alignment); float NJ_pw_percentid(NJ_alignment *alignment, long int x, long int y); long int NJ_pw_differences(NJ_alignment *alignment, long int x, long int y, long int *residues); void NJ_no_correction(DMAT *dmat, NJ_alignment *alignment); void NJ_DNA_jc_correction(DMAT *dmat, NJ_alignment *alignment); void NJ_PROTEIN_jc_correction(DMAT *dmat, NJ_alignment *alignment); void NJ_DNA_k2p_correction(DMAT *dmat, NJ_alignment *alignment); void NJ_PROTEIN_kimura_correction(DMAT *dmat, NJ_alignment *alignment); void NJ_DNA_count_tt(NJ_alignment *alignment, long int x, long int y, long int *transitions, long int *transversions, long int *residues); #ifdef __cplusplus } #endif #endif /* _INC_DIST_H_ */ mothur-1.48.0/source/clearcut/dmat.cpp000077500000000000000000000441401424121717000176620ustar00rootroot00000000000000/* * dmat.c * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Distance matrix parser * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #include #include #include #include #include #include "common.h" #include "clearcut.h" #include "dmat.h" /* * * NJ_is_alpha() - determine if character is an alphabetic character * * INPUT: * ------ * c -- character to test * * RETURN: * ------- * int -- 1 if character is alphabetic (A-Z || a-z) * 0 if character is NOT alphabetic * */ /* * * NJ_is_whitespace() - determine if character is a whitespace character * * INPUT: * ------ * c -- character to test * * RETURN: * ------- * int -- 1 if character is whitespace (space, tab, CR, LF) * 0 if character is NOT whitespace * */ static inline int NJ_is_whitespace(char c) { if( c == ' ' || /* space */ c == '\n' || /* newline */ c == '\r' || /* carriage-return */ c == '\v' || /* vertical tab */ c == '\f' || /* form feed */ c == '\t' ) { /* horizontal tab */ return(1); } else { return(0); } } /* * * NJ_is_number() - determine if character is a number * * INPUT: * ------ * c -- character to test * * RETURN: * ------- * int -- 1 if character is a number (0-9) * 0 if character is NOT a number * */ static inline int NJ_is_number(char c) { if(c >= '0' && c <= '9') { return(1); } else { return(0); } } /* * NJ_is_distance() - check if string is a properly formatted distance value * */ static inline int NJ_is_distance(char *token) { int i; char c; int exponent_state; int expsign_state; int dpoint_state; /* if token is nullptr return failure */ if(!token) { return(0); } exponent_state = 0; expsign_state = 0; dpoint_state = 0; /* The first character must be a number, a decimal point or a sign */ c = token[0]; if(!NJ_is_number(c) && c != '.' && c != '-' && c != '+' ) { goto BAD; } /* * if the first character is not a number, and string is only one * character long, then we return failure. */ if(strlen(token) == 1) { if(!NJ_is_number(c)) { goto BAD; } } for(i=0;i0 && !exponent_state) { if(c == '-' || c == '+') { goto BAD; } } /* if we are in the exponent state, and we've already seen a sign */ if(exponent_state && expsign_state) { if(c == '-' || c == '+') { goto BAD; } } /* if we are in the exponent state and we see a decimal point */ if(exponent_state) { if(c == '.') { goto BAD; } } /* if we are in the exponent state and see another e or E */ if(exponent_state) { if(c == 'e' || c == 'E') { goto BAD; } } /* if we are dpoint_state and see another decimal point */ if(dpoint_state) { if(c == '.') { goto BAD; } } /* enter the exponent state if we need to */ if(!exponent_state) { if(c == 'e' || c == 'E') { exponent_state = 1; } } /* enter the expsign_state if we need to */ if(exponent_state && !expsign_state) { if(c == '-' || c == '+') { expsign_state = 1; } } /* if not in dpoint state and we see a dpoint */ if(!dpoint_state) { if(c == '.') { dpoint_state = 1; } } } /* the token must end in a number char */ if(!NJ_is_number(token[strlen(token)-1])) { goto BAD; } /* token is a valid numerical distance */ return(1); BAD: /* token is invalid distance format */ return(0); } /* * NJ_is_label() - * * Simply, if token is not a valid number, then it is a name * */ /* * NJ_get_token() - get a token from an input stream * */ static inline int NJ_get_token(FILE *fp, NJ_DIST_TOKEN *token) { char c; int index; c = fgetc(fp); if(feof(fp)) { token->type = NJ_EOF_STATE; return(token->type); } if(NJ_is_whitespace(c)) { token->buf[0] = c; token->buf[1] = '\0'; token->type = NJ_WS_STATE; return NJ_WS_STATE; } index = 0; while(!NJ_is_whitespace(c)) { /* reallocate our buffer if necessary */ if(index >= token->bufsize) { token->bufsize *= 2; token->buf = (char *)realloc(token->buf, token->bufsize*sizeof(char)); if(!token->buf) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_get_token()\n"); exit(-1); } } token->buf[index++] = c; c = fgetc(fp); if(feof(fp)) { token->type = NJ_EOF_STATE; break; } } token->buf[index] = '\0'; if(token->type != NJ_EOF_STATE) { if(NJ_is_distance(token->buf)) { token->type = NJ_FLOAT_STATE; } else { token->type = NJ_NAME_STATE; } } return(token->type); } /* * NJ_parse_distance_matrix() -- Takes a filename and returns a distance matrix * * * INPUT: * ------ * nj_args -- a pointer to a structure containing the command-line arguments * * OUTPUT: * ------- * -- nullptr (failure) * -- A pointer to a populated distance matrix * * DESCRIPTION: * ------------ * This function implements a simple state machine to parse a distance matrix * in approximate PHYLIP format. This function auto-detects whether the * distance matrix is in upper, lower, or fully-symmetric format and handles * it accordingly. For full/symmetric matrices, values must be symmetric * around the diagonal, which is required to be zeroes. Names and values must * be separated by whitespace (space, tab, newlines, etc.). Taxon labels can * include numbers, but must start with non-numerical symbols. * * * *** UPPER FORMAT EXAMPLE *** * * 4 * seq1 0.2 0.3 0.1 * seq2 0.2 0.3 * seq3 0.1 * seq4 * * *** LOWER FORMAT EXAMPLE *** * * 4 * seq1 * seq2 0.3 * seq3 0.2 0.4 * seq4 0.3 0.1 0.3 * * *** SYMMETRIC (FULL) EXAMPLE *** * * 4 * seq1 0.0 0.3 0.5 0.3 * seq2 0.3 0.0 0.1 0.2 * seq3 0.5 0.1 0.0 0.9 * seq4 0.3 0.2 0.9 0.0 * * Values in the distance matrix can be positive or negative, integers or * real values. Values can also be parsed in exponential notation form. * */ DMAT * NJ_parse_distance_matrix(NJ_ARGS *nj_args) { DMAT *dmat = nullptr; FILE *fp = nullptr; NJ_DIST_TOKEN *token = nullptr; int state, dmat_type; int row; int fltcnt; int x, y, i; int numvalread; int expectedvalues = -1; float val; int first_state = 0; /* allocate our distance matrix and token structure */ dmat = (DMAT *)calloc(1, sizeof(DMAT)); token = (NJ_DIST_TOKEN *)calloc(1, sizeof(NJ_DIST_TOKEN)); if(token) { token->bufsize = NJ_INITIAL_BUFSIZE; token->buf = (char *)calloc(token->bufsize, sizeof(char)); } if(!dmat || !token || !token->buf) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n"); goto XIT_BAD; } /* open distance matrix file here */ if(nj_args->stdin_flag) { fp = stdin; } else { fp = fopen(nj_args->infilename, "r"); if(fp==nullptr) { fprintf(stderr, "Clearcut: Could not open distance matrix: %s\n", nj_args->infilename); perror("Clearcut"); goto XIT_BAD; } } /* get the number of taxa in this file */ fscanf(fp, "%ld\n", &dmat->ntaxa); if(dmat->ntaxa < 2) { fprintf(stderr, "Clearcut: Invalid number of taxa in distance matrix\n"); goto XIT_BAD; } /* set our initial working size according to the # of taxa */ dmat->size = dmat->ntaxa; /* allocate space for the distance matrix values here */ dmat->val = (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float)); if(!dmat->val) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n"); goto XIT_BAD; } /* taxa names */ dmat->taxaname = (char **)calloc(dmat->ntaxa, sizeof(char *)); if(!dmat->taxaname) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n"); goto XIT_BAD; } /* set the initial state of our state machine */ dmat_type = NJ_PARSE_UNKNOWN; row = -1; fltcnt = 0; numvalread = 0; /* read the input one character at a time to drive simple state machine */ state = NJ_get_token(fp, token); while(state != NJ_EOF_STATE) { switch(state) { case NJ_NAME_STATE: if(first_state == 0) { first_state = 1; } row++; if(row > 0 && dmat_type == NJ_PARSE_UNKNOWN) { if(fltcnt == dmat->ntaxa) { dmat_type = NJ_PARSE_SYMMETRIC; expectedvalues = dmat->ntaxa * dmat->ntaxa; } else if (fltcnt == dmat->ntaxa-1) { dmat_type = NJ_PARSE_UPPER; expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2; /* shift everything in first row by one char */ for(i=dmat->ntaxa-2;i>=0;i--) { dmat->val[i+1] = dmat->val[i]; } } else if (fltcnt == 0) { dmat_type = NJ_PARSE_LOWER; expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2; } else { goto XIT_BAD; } } if(row >= dmat->ntaxa) { goto XIT_BAD; } /* allocate space for this taxon label */ dmat->taxaname[row] = (char *)calloc(strlen(token->buf)+1, sizeof(char)); if(!dmat->taxaname[row]) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n"); goto XIT_BAD; } strcpy(dmat->taxaname[row], token->buf); fltcnt = 0; break; case NJ_FLOAT_STATE: if(first_state == 0) { goto XIT_BAD; } //fprintf(stdout, "the token buf is %s", token->buf); //fprintf(stdout, "the token buf is %f", errno); val = atof(token->buf); //fprintf(stdout, "the token buf is %f", errno); if(errno) { fprintf(stderr, "Clearcut: Distance value out-of-range.\n"); goto XIT_BAD; } x = row; y = fltcnt; switch(dmat_type) { case NJ_PARSE_UNKNOWN: dmat->val[NJ_MAP(x, y, dmat->size)] = val; break; case NJ_PARSE_SYMMETRIC: if(fltcnt >= dmat->ntaxa) { fprintf(stderr, "Clearcut: Incorrect number of distance values on row, %s. Expected %d, and found %ld.\n", dmat->taxaname[row], fltcnt, (dmat->ntaxa)); goto XIT_BAD; } if(x < y) { dmat->val[NJ_MAP(x, y, dmat->size)] = val; } else if(x > y) { if(!NJ_FLT_EQ(val, dmat->val[NJ_MAP(y, x, dmat->size)])) { fprintf(stderr, "Clearcut: Full matrices must be symmetric.\n"); goto XIT_BAD; } } else { if(!NJ_FLT_EQ(val, 0.0)) { fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n"); goto XIT_BAD; } } break; case NJ_PARSE_UPPER: if(fltcnt > dmat->ntaxa-row) { fprintf(stderr, "Clearcut: Incorrect number of distance values on row, %s. Expected %d, and found %ld.\n", dmat->taxaname[row], fltcnt, (dmat->ntaxa-row)); goto XIT_BAD; } dmat->val[NJ_MAP(x, x+y+1, dmat->size)] = val; break; case NJ_PARSE_LOWER: if(fltcnt > row-1) { fprintf(stderr, "Clearcut: Incorrect number of distance values on row, %s. Expected %d, and found %d.\n", dmat->taxaname[row], fltcnt, (row-1)); goto XIT_BAD; } dmat->val[NJ_MAP(y, x, dmat->size)] = val; break; default: goto XIT_BAD; break; } fltcnt++; numvalread++; break; case NJ_WS_STATE: break; case NJ_EOF_STATE: if(first_state == 0) { goto XIT_BAD; } break; default: fprintf(stderr, "Clearcut: Unknown state in distance matrix parser.\n"); break; } /* get next token from stream */ state = NJ_get_token(fp, token); } /* * At the end, if we have not read the number of values that we predicted * we would need, then there was a problem and we need to punt. */ if(numvalread != expectedvalues) { fprintf(stderr, "Clearcut: Incorrect number of values in the distance matrix. Expected %d, and found %d.\n", numvalread, expectedvalues); goto XIT_BAD; } /* special check to make sure first value read is 0.0 */ if(dmat_type == NJ_PARSE_SYMMETRIC) { if(!NJ_FLT_EQ(dmat->val[NJ_MAP(0, 0, dmat->size)], 0.0)) { fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n"); goto XIT_BAD; } } /* now lets allocate space for the r and r2 columns */ dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float)); dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float)); if(!dmat->r || !dmat->r2) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n"); goto XIT_BAD; } /* track some memory addresses */ dmat->rhandle = dmat->r; dmat->r2handle = dmat->r2; dmat->valhandle = dmat->val; /* close matrix file here */ if(!nj_args->stdin_flag) { fclose(fp); } if(token) { if(token->buf) { free(token->buf); } free(token); } return(dmat); /* clean up our partial progress */ XIT_BAD: if(fp) { fprintf(stderr, "Clearcut: Syntax error in distance matrix at offset %ld.\n", ftell(fp)); } /* close matrix file here */ if(!nj_args->stdin_flag) { if(fp) { fclose(fp); } } /* if we have a valid dmat (partial or complete), we need to free it */ if(dmat) { NJ_free_dmat(dmat); } if(token) { if(token->buf) { free(token->buf); } free(token); } return(nullptr); } /* * NJ_output_matrix() - Output a distance matrix to the specified file * * * INPUTS: * ------- * nj_args -- a pointer to a data structure holding the command-line args * dmat -- a pointer to a distance matrix * * * RETURNS: * -------- * NOTHING * * * DESCRIPTION: * ------------ * If the appropriate flag was specified in the command-line, this function * now outputs the parsed or computed distance matrix to a file. This * can be useful if generating a distance matrix was the primary goal of * running the program, or if one wanted to debug and/or verify the * correctness of the program. * * Currently this function outputs full/symmetric matrices only. * */ void NJ_output_matrix(NJ_ARGS *nj_args, DMAT *dmat) { FILE *fp = nullptr; long int i, j; /* if we haven't specieid matrixout, return immediately */ if(!nj_args->matrixout) { return; } /* open the specified matrix file for writing */ fp = fopen(nj_args->matrixout, "w"); if(!fp) { fprintf(stderr, "Clearcut: Could not open matrix file %s for output.\n", nj_args->matrixout); return; } /* output the number of taxa in the matrix */ fprintf(fp, " %ld\n", dmat->size); fprintf(fp, "%s\n", dmat->taxaname[0]); // print the first taxon name outside of the main loop for(i=1;isize;i++) { /* output taxaname */ fprintf(fp, "%s\t", dmat->taxaname[i]); for(j=0;jexpdist) { /* exponential notation (or not) */ fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]); } else { fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]); } } fprintf(fp, "\n"); } #ifdef FULL_SYMMETRIC_MATRIX /* output the number of taxa in the matrix */ fprintf(fp, " %ld\n", dmat->size); for(i=0;isize;i++) { /* output taxaname */ fprintf(fp, "%s\t", dmat->taxaname[i]); for(j=0;jsize;j++) { if(i>j) { if(nj_args->expdist) { /* exponential notation (or not) */ fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]); } else { fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]); } } else if(iexpdist) { /* exponential notation (or not) */ fprintf(fp, "%e ", dmat->val[NJ_MAP(i,j,dmat->size)]); } else { fprintf(fp, "%f ", dmat->val[NJ_MAP(i,j,dmat->size)]); } } else { if(nj_args->expdist) { /* exponential notation (or not) */ fprintf(fp, "%e ", 0.0); } else { fprintf(fp, "%f ", 0.0); } } } fprintf(fp, "\n"); } #endif // FULL_SYMMETRIC_MATRIX /* close the file here */ if(fp) { fclose(fp); } return; } mothur-1.48.0/source/clearcut/dmat.h000077500000000000000000000051411424121717000173250ustar00rootroot00000000000000/* * dmat.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Distance matrix parser header file * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu */ #ifndef _INC_DMAT_H_ #define _INC_DMAT_H_ 1 #ifdef __cplusplus extern "C" { #endif #include "clearcut.h" #define NJ_INITIAL_BUFSIZE 32 #define NJ_NAME_STATE 100 #define NJ_FLOAT_STATE 101 #define NJ_WS_STATE 102 #define NJ_EOF_STATE 103 #define NJ_PARSE_SYMMETRIC 100 #define NJ_PARSE_LOWER 101 #define NJ_PARSE_UPPER 102 #define NJ_PARSE_UNKNOWN 103 /* some data structures */ typedef struct _NJ_DIST_TOKEN_STRUCT { char *buf; long int bufsize; int type; } NJ_DIST_TOKEN; /* some function prototypes */ DMAT * NJ_parse_distance_matrix(NJ_ARGS *nj_args); void NJ_output_matrix(NJ_ARGS *nj_args, DMAT *dmat); #ifdef __cplusplus } #endif #endif /* _INC_DMAT_H_ */ mothur-1.48.0/source/clearcut/fasta.cpp000077500000000000000000000377451424121717000200500ustar00rootroot00000000000000/* * fasta.c * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * Functions for parsing FASTA formatted alignment files * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #include #include #include #include #include "clearcut.h" #include "common.h" #include "fasta.h" #define NJ_NUM_DNA_AMBIGUITY_SYMS 14 static const char NJ_dna_ambiguity_syms[NJ_NUM_DNA_AMBIGUITY_SYMS] = { 'M', 'R', 'W', 'S', 'Y', 'K', 'V', 'H', 'D', 'B', 'X', 'N', '-', '.' }; #define NJ_NUM_PROTEIN_AMBIGUITY_SYMS 6 static const char NJ_protein_ambiguity_syms[NJ_NUM_PROTEIN_AMBIGUITY_SYMS] = { 'X', 'B', 'Z', '*', '-', '.' }; #define NJ_NUM_DNA_SYMS 5 static const char NJ_dna_syms[NJ_NUM_DNA_SYMS] = { 'A', 'G', 'C', 'T', 'U' }; #define NJ_NUM_PROTEIN_SYMS 20 static const char NJ_protein_syms[NJ_NUM_PROTEIN_SYMS] = { 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V' }; /* * NJ_is_whitespace() - Check to see if character is whitespace * * INPUTS: * ------- * c -- character to check * * RETURNS: * -------- * int -- 0 if not whitespace * 1 if whitespace */ static inline int NJ_is_whitespace(char c) { if( c == ' ' || /* space */ c == '\n' || /* newline */ c == '\r' || /* carriage-return */ c == '\v' || /* vertical tab */ c == '\f' || /* form feed */ c == '\t' ) { /* horizontal tab */ return(1); } else { return(0); } } /* * NJ_is_dna() - * * Determines if the given symbol is DNA * * RETURNS: 1 if DNA * 0 if not DNA * */ static inline int NJ_is_dna(char c) { int i; char up_c; up_c = toupper(c); for(i=0;i sequence1 * ATAGATATAGATTAGAATAT----TATAGATAT----ATATAT-TTT- * > sequence2 * --ATAGATA---ATATATATATTTT--GTCTCATAGT---ATATGCTT * > sequence3 * TTATAGATA---ATATATATATTTTAAGTCTCATAGT-A-ATATGC-- * * This function will parse alignments for DNA or protein, and will do * so mindful of ambiguity codes for these kinds of sequences. All * ambiguity codes are ignored by this program for the purposes of * computing a distance matrix from a multiple alignment. By design, * this program does not auto-detect DNA vs. Protein, and requires that * the user explictly specify that on the command-line. * * Gaps can be represented either with the '-' or '.' characters. * * Newlines and other whitespace are allowed to be interspersed * throughout the sequences. * * Taxon labels are required to be unique, and they must start with * an alphabetic character (not a number, etc.). The parser will read * the first token after the > character in the description line up until the * first whitespace and use that for the taxon label. * * For example, in the line "> taxon1 is homo sapien", the taxon label will be * "taxon1" * */ NJ_alignment * NJ_read_fasta(NJ_ARGS *nj_args) { FILE *fp = nullptr; char *buf = nullptr; char *ptr = nullptr; NJ_alignment *alignment = nullptr; char c; int state; long int index, x, seq; long int i; long int bufsize, nseqs = NJ_INITIAL_NSEQS; int first_sequence_flag; /* * In this function, we implement a FASTA alignment parser which * reads in an alignment character-by-character, maintaining state * information which guides the parser. * * The program reads either DNA or Protein alignments. All title lines * and sequences can be arbitrarily long. Gaps can be represented by * "-" or "." characters. * * Ambiguity codes are also handled. * */ /* * We can't handle reading fasta input unless the user explicity * specifies the input type...just to be sure. */ if( (!nj_args->dna_flag && !nj_args->protein_flag) || (nj_args->dna_flag && nj_args->protein_flag) ) { fprintf(stderr, "Clearcut: Explicitly specify protein or DNA\n"); goto XIT_BAD; } /* open specified fasta file here */ if(nj_args->stdin_flag) { fp = stdin; } else { fp = fopen(nj_args->infilename, "r"); if(!fp) { fprintf(stderr, "Clearcut: Failed to open input FASTA file: %s\n", nj_args->infilename); perror("Clearcut"); goto XIT_BAD; } } /* allocate the initial buffer */ bufsize = NJ_INITIAL_BUFFER_SIZE; buf = (char *)calloc(bufsize, sizeof(char)); /* allocate the alignment container here */ alignment = (NJ_alignment *)calloc(1, sizeof(NJ_alignment)); /* allocate initial title array */ // printf("allocating initial title array\n"); alignment->titles = (char **)calloc(NJ_INITIAL_NSEQS, sizeof(char *)); /* make sure that we successfully allocated memory */ if(!buf || !alignment || !alignment->titles) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } /* a flag */ first_sequence_flag = 1; index = 0; /* tracks the position in buffer */ x = 0; /* tracks the position on sequence */ seq = 0; /* tracks the active sequence */ /* intitial state of state machine */ state = NJ_FASTA_MODE_UNKNOWN; while(1) { /* get the next character */ c = fgetc(fp); if(feof(fp)) { if(state == NJ_FASTA_MODE_SEQUENCE) { buf[index+1] = '\0'; /* copy buf to alignment */ for(i=1;i<=alignment->length;i++) { alignment->data[seq*alignment->length+i-1] = buf[i]; } } break; } /* make sure our dynamic buffer is big enough */ if(index >= bufsize) { bufsize *= 2; buf = (char *)realloc(buf, bufsize); if(!buf) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } } switch(state) { case NJ_FASTA_MODE_UNKNOWN: if(!NJ_is_whitespace(c)) { if(c == '>') { state = NJ_FASTA_MODE_TITLE; buf[0] = '>'; } else { goto XIT_BAD; } } break; case NJ_FASTA_MODE_TITLE: if( c == '\n' || c == '\r' ) { buf[index] = '\0'; state = NJ_FASTA_MODE_SEQUENCE; index = 0; x = -1; /* make sure we've allocated enough space for titles and sequences */ if(seq == nseqs) { // printf("realloc(). seq = %d, nseqs = %d\n", seq, nseqs); nseqs *= 2; alignment->titles = (char **)realloc(alignment->titles, nseqs*sizeof(char *)); if(!alignment->titles) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char)); if(!alignment->data) { fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } } // printf("Allocating %d bytes for title %d: %s\n", (int)strlen(buf), (int)seq, buf); alignment->titles[seq] = (char *)calloc(strlen(buf), sizeof(char)); if(!alignment->titles[seq]) { fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } /* lets forward to the first non-space (space/tab) character after the '>' */ if(first_sequence_flag) { ptr = buf; } else { ptr = &buf[1]; } while(*ptr == '\t' || *ptr == ' ') { ptr++; } sscanf(ptr, "%s", alignment->titles[seq]); /* get the first word and use as the title */ alignment->nseq++; } buf[index++] = c; break; case NJ_FASTA_MODE_SEQUENCE: if(c == '>') { if(first_sequence_flag) { first_sequence_flag = 0; /* allocate our alignment data section here */ alignment->length = index-1; nseqs = NJ_INITIAL_NSEQS; alignment->data = (char *)calloc(alignment->length*nseqs, sizeof(char)); if(!alignment->data) { fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } } if(!first_sequence_flag) { if(index-1 < alignment->length) { fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq); goto XIT_BAD; } } /* re-allocate if necessary */ /* if(seq >= nseqs) { nseqs *= 2; alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char)); if(!alignment->data) { fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n"); goto XIT_BAD; } } */ /* copy buf to alignment */ for(i=1;i<=alignment->length;i++) { alignment->data[seq*alignment->length+i-1] = buf[i]; } state = NJ_FASTA_MODE_TITLE; index = 1; x = 1; buf[0] = c; seq++; } else { if(NJ_is_whitespace(c)) { break; } if(!first_sequence_flag) { if(index-1 >= alignment->length) { fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq); goto XIT_BAD; } } /* * Here we check to make sure that the symbol read is appropriate * for the type of data the user specified. (dna or protein). * We also handle ambiguity codes by converting them to a specific * assigned ambiguity code character. Ambiguity codes are ignored * when computing distances */ if(nj_args->dna_flag) { if(NJ_is_dna(c)) { buf[index++] = toupper(c); } else { if(NJ_is_dna_ambiguity(c)) { buf[index++] = NJ_AMBIGUITY_CHAR; } else { fprintf(stderr, "Clearcut: Unknown symbol '%c' in nucleotide sequence %ld.\n", c, seq); goto XIT_BAD; } } } else if(nj_args->protein_flag) { if(NJ_is_protein(c)) { buf[index++] = toupper(c); } else { if(NJ_is_protein_ambiguity(c)) { buf[index++] = NJ_AMBIGUITY_CHAR; } else { fprintf(stderr, "Clearcut: Unknown symbol '%c' in protein sequence %ld.\n", c, seq); goto XIT_BAD; } } } } break; default: goto XIT_BAD; break; } } if(index-1 != alignment->length) { fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq); goto XIT_BAD; } /* check for duplicate taxon labels */ if(!NJ_taxaname_unique(alignment)) { goto XIT_BAD; } return(alignment); XIT_BAD: if(fp) { fprintf(stderr, "Clearcut: Fatal error parsing FASTA file at file offset %ld.\n", ftell(fp)); } if(buf) { free(buf); } NJ_free_alignment(alignment); return(nullptr); } /* * NJ_print_alignment() - Print multiple sequence alignment (for debugging) * * INPUTS: * ------- * alignment -- A pointer to the alignment * * RETURNS: * -------- * NONE * */ void NJ_print_alignment(NJ_alignment *alignment) { long int i, j; printf("nseq = %ld, length = %ld\n", alignment->nseq, alignment->length); for(i=0;inseq;i++) { printf("> %s\n", alignment->titles[i]); for(j=0;jlength;j++) { printf("%c", alignment->data[i*alignment->length+j]); } printf("\n"); } return; } /* * * NJ_free_alignment() - Free all of the memory allocated for the * multiple sequence alignment * * INPUTS: * ------- * alignment -- A pointer to the multiple sequence alignment * * RETURNS: * -------- * NONE * */ void NJ_free_alignment(NJ_alignment *alignment) { long int i; if(alignment) { /* free the allocated titles */ if(alignment->titles) { for(i=0;inseq;i++) { if(alignment->titles[i]) { free(alignment->titles[i]); } } free(alignment->titles); } /* free the alignment data */ if(alignment->data) { free(alignment->data); } /* free the alignment itself */ free(alignment); } return; } /* * NJ_taxaname_unique() - Check to see if taxanames are unique in alignment * * INPUTS: * ------- * alignment -- a pointer to a multiple sequence alignment * * OUTPUTS: * -------- * int -- 0 if all taxanames in alignment are unique * 1 if all taxanames in alignment are NOT unique * * * DESCRIPTION: * ------------ * * Check to see if the taxanames in the alignment are unique. It * will be impossible to make sense of the final tree if the taxon * labels are not unqiue. * */ int NJ_taxaname_unique(NJ_alignment *alignment) { long int i, j; for(i=0;inseq;i++) { for(j=i+1;jnseq;j++) { if(!strcmp(alignment->titles[i], alignment->titles[j])) { fprintf(stderr, "Clearcut: Taxa %ld and %ld (%s) do not have unique taxon labels.\n", i, j, alignment->titles[i]); return(0); } } } return(1); } void NJ_print_titles(NJ_alignment *alignment) { int i; for(i=0;inseq;i++) { printf("%d: %s\n", i, alignment->titles[i]); } return; } mothur-1.48.0/source/clearcut/fasta.h000077500000000000000000000050051424121717000174750ustar00rootroot00000000000000/* * fasta.h * * $Id$ * ***************************************************************************** * * Copyright (c) 2004, Luke Sheneman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * + Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * + Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * + The names of its contributors may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************** * * AUTHOR: * * Luke Sheneman * sheneman@cs.uidaho.edu * */ #ifndef _INC_NJ_FASTA_H_ #define _INC_NJ_FASTA_H_ 1 #ifdef __cplusplus extern "C" { #endif #include "clearcut.h" #define NJ_INITIAL_BUFFER_SIZE 512 #define NJ_INITIAL_NSEQS 64 #define NJ_FASTA_MODE_TITLE 100 #define NJ_FASTA_MODE_SEQUENCE 101 #define NJ_FASTA_MODE_NEWLINE 102 #define NJ_FASTA_MODE_UNKNOWN 103 typedef struct _STRUCT_NJ_ALIGNMENT { long int nseq; long int length; char **titles; char *data; } NJ_alignment; NJ_alignment * NJ_read_fasta(NJ_ARGS *nj_args); void NJ_print_alignment(NJ_alignment *alignment); void NJ_free_alignment(NJ_alignment *alignment); int NJ_taxaname_unique(NJ_alignment *alignment); #ifdef __cplusplus } #endif #endif /* _INC_NJ_FASTA_H_ */ mothur-1.48.0/source/clearcut/getopt_long.cpp000077500000000000000000000302551424121717000212600ustar00rootroot00000000000000/* This getopt_long() is compatible with GNU's, however, added original extention (short 1 byte option). Copyright (c) 2004 Koji Arai Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Compilation for Test: GNU: cc -DUSE_GNU -DDEBUG getopt_long.c -o test_getopt_long_gnu not GNU: cc -I. -DDEBUG getopt_long.c -o test_getopt_long ./test_getopt_long ./test_getopt_long_gnu BUGS: * not implemented any features for getopt() and getopt_long(). */ #include #include #include #if DEBUG static int puts_argv(char **argv) { int i; for (i = 0; argv[i]; i++) { if (i) printf(" "); printf("%s", argv[i]); } printf("\n"); return 0; } #endif #ifndef USE_GNU #include #include "getopt_long.h" char *optarg; int optind; int opterr; int optopt; /* return value 0: no option (include '-') 1: short option like '-x' 2: long option like '--xxx' and just '--' */ static int is_option(char *arg) { if (arg[0] == '-') { switch (arg[1]) { case 0: /* just "-" */ return 0; case '-': /* long option (include just "--")*/ return 2; default: /* short option */ return 1; } } return 0; } static int insert_argv(char **argv, int src, int dest) { int i; char *tmp = argv[src]; if (src > dest) { for (i = src; i > dest; i--) argv[i] = argv[i-1]; //printf("%s\n", argv[i]); } if (src < dest) { for (i = src; i < dest; i++) argv[i] = argv[i+1]; //printf("%s\n", argv[i]); } argv[dest] = tmp; //printf("%s\n", argv[dest]); return 0; } static int search_longopt(char *arg, struct option *longopts) { int i, found = -1; int len; for (len = 0; arg[len] && arg[len] != '='; len++) ; for (i = 0; longopts[i].name; i++) { if (strncmp(arg, longopts[i].name, len)==0) { found = i; break; } } return found; } /* * implemented my extention feature. * optional 1 byte argument with [...] * e.g.) shortopts = "a[0123]b" * accepts "-a0 -a1b" (same as "-a0 -a1 -b") */ static int has_argument_short(char *arg, const char *shortopts) { int i; int open_bracket = 0; for (i = 0; shortopts[i]; i++) { switch (shortopts[i]) { case '[': open_bracket++; continue; case ']': if (open_bracket <= 0) { fprintf(stderr, "getopt_long() -- unbalanced bracket in short options"); return -1; } open_bracket--; continue; } if (open_bracket) continue; if (*arg != shortopts[i]) continue; switch (shortopts[i+1]) { case ':': if (shortopts[i+2] != ':') { if (arg[1]) return 1; /* following string is argument */ else return 2; /* next argv is argument */ } else { /* '::' means optional argument (GNU extention) */ if (arg[1]) return 1; else return 0; /* no argument */ } case '[': if (arg[1] == '\0') return 0; /* no argument */ /* my extention */ for (i++; shortopts[i] && shortopts[i] != ']'; i++) { if (arg[1] == shortopts[i]) return 3; /* has 1 byte argument */ } if (!shortopts[i]) { fprintf(stderr, "getopt_long() -- unbalanced bracket in short options"); return -1; } break; default: return 0; /* no argument */ } } /* Invalid option */ return -1; } static int has_argument_long(char *arg, struct option *longopts) { int i; i = search_longopt(arg, longopts); if (i == -1) { /* Invalid option */ return -1; } else { int len = strlen(arg); char *p = strchr(arg, '='); if (p) { len = p - arg; } switch (longopts[i].has_arg) { case no_argument: return 0; case required_argument: if (arg[len] == '=') return 1; else return 2; case optional_argument: if (arg[len] == '=') return 1; else return 0; default: assert(0); } } } /* -1: no option 0: no argument 1: has argument in this argv 2: has argument in next argv 3: has 1 byte argument in this argv */ static int has_argument(char *arg, const char *shortopts, struct option *longopts) { int i, n; switch (is_option(arg)) { case 0: /* no option */ return -1; case 1: /* short option */ n = -1; for (i = 1; arg[i]; i++) { n = has_argument_short(arg+i, shortopts); if (n == 0 && arg[i+1]) continue; if (n == 3 && arg[i+2]) { i++; continue; } break; } return n; case 2: /* long option */ return has_argument_long(arg+2, longopts); break; default: assert(0); } } int getopt_long(int argc, char **argv, const char *shortopts, struct option *longopts, int *indexptr) { char *opt; int i; static int shortoptind; static int no_optind = 0; if (optind == 0) { /* skip first argument (command name) */ optind++; no_optind = 0; shortoptind = 0; } optarg = 0; if (no_optind && !shortoptind) { while (!is_option(argv[no_optind])) insert_argv(argv, no_optind, optind-1); if (has_argument(argv[no_optind], shortopts, longopts) == 2) no_optind += 2; else no_optind++; if (argv[optind] && strcmp(argv[optind], "--") == 0) { while (!is_option(argv[no_optind])) insert_argv(argv, no_optind, optind); optind = no_optind; no_optind = 0; } } if (optind >= argc) goto end_of_option; retry: /* puts_argv(&argv[optind]); */ opt = argv[optind]; if (shortoptind == 0 && is_option(opt) == 1) { shortoptind++; } if (shortoptind) { /* short option */ char *p = &opt[shortoptind]; if (*p == '\0') assert(0); switch (has_argument_short(p, shortopts)) { case 0: /* no argument */ optarg = 0; shortoptind++; if (opt[shortoptind] == '\0') optind++, shortoptind = 0; return *p; case 1: /* following character is argument */ optind++, shortoptind = 0; optarg = &p[1]; return *p; case 2: /* next argv is argument */ optind++, shortoptind = 0; optarg = argv[optind++]; return *p; case 3: /* has 1 byte argument */ optarg = &p[1]; if (p[2] == 0) optind++, shortoptind = 0; else shortoptind += 2; return *p; default: /* Invalid option */ if (opterr) fprintf(stderr, "%s: invalid option -- %c\n", argv[0], *p); optind++, shortoptind = 0; optopt = *p; return '?'; } } else if (opt[0] == '-' && opt[1] == '-') { /* long option */ if (opt[2] == '\0') { /* end of command line switch */ optind++; return -1; } opt += 2; i = search_longopt(opt, longopts); if (i == -1) { optind++; optopt = 0; return '?'; } else { int len = strlen(opt); char *p = strchr(opt, '='); if (p) { len = p - opt; } switch (longopts[i].has_arg) { case no_argument: break; case required_argument: if (opt[len] == '=') optarg = opt + len + 1; else { optind++; optarg = argv[optind]; if (optarg == 0) { if (opterr) fprintf(stderr, "%s: option `--%s' requires an argument\n", argv[0], opt); optopt = 0; return '?'; /* no argument */ } } break; case optional_argument: if (opt[len] == '=') optarg = opt + len + 1; else { optarg = 0; } break; default: break; } *indexptr = i; optind++; if (longopts[i].flag) { *longopts[i].flag = longopts[i].val; return 0; } else { return longopts[i].val; } } optind++; optopt = 0; return '?'; } /* not option */ if (no_optind == 0) no_optind = optind; for (i = optind; argv[i]; i++) { if (is_option(argv[i])) { optind = i; goto retry; } } end_of_option: if (no_optind) { optind = no_optind; no_optind = 0; } return -1; } #endif /* USE_GNU */ #if DEBUG #include #include #include #if USE_GNU #include /* use GNU getopt_long() */ #endif static int verbose_flag; static int option_index; int argc; char *argv[50]; char **p; int c; static struct option long_options[] = { {"verbose", no_argument, &verbose_flag, 1}, {"brief", no_argument, &verbose_flag, 0}, {"add", required_argument, 0, 'a'}, {"append", no_argument, 0, 0}, {"delete", required_argument, 0, 0}, {"create", optional_argument, 0, 0}, {"change", optional_argument, 0, 0}, {0, 0, 0, 0} }; int call_getopt_long(int argc, char **argv, const char *shortopts, struct option *longopts, int *indexptr) { int c; c = getopt_long(argc, argv, shortopts, longopts, indexptr); puts_argv(argv); printf("ret=%d(%c) option_index=%d ", c, c, option_index); printf("optind=%d optarg=[%s] opterr=%d optopt=%d(%c)\n", optind, optarg, opterr, optopt, optopt); if (c == 0) { struct option *opt; opt = &longopts[*indexptr]; printf("long option: --%s has_arg=%d\n", opt->name, opt->has_arg); if (opt->flag) printf(" flag=[%8p] val=%d\n", opt->flag, *opt->flag); } return c; } #endif mothur-1.48.0/source/clearcut/getopt_long.h000077500000000000000000000032771424121717000207310ustar00rootroot00000000000000/* This getopt_long() is compatible with GNU's, however, added original extention (short 1 byte option). Copyright (c) 2004 Koji Arai Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _GETOPT_H #ifdef __cplusplus extern "C" { #endif struct option { const char *name; int has_arg; /* values of has_arg */ #define no_argument 0 #define required_argument 1 #define optional_argument 2 int *flag; int val; }; extern char *optarg; extern int optind; int getopt_long(int argc, char **argv, const char *shortopts, struct option *longopts, int *indexptr); #ifdef __cplusplus } #endif #endif /* _GETOPT_H */ mothur-1.48.0/source/cluster.cpp000077500000000000000000000177671424121717000166330ustar00rootroot00000000000000/* * cluster.cpp * * * Created by Pat Schloss on 8/14/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "cluster.hpp" #include "rabundvector.hpp" #include "listvector.hpp" /***********************************************************************/ Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) : rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs) { try { mapWanted = false; //set to true by mgcluster to speed up overlap merge //save so you can modify as it changes in average neighbor cutoff = c; m = MothurOut::getInstance(); } catch(exception& e) { m->errorOut(e, "Cluster", "Cluster"); exit(1); } } /***********************************************************************/ void Cluster::clusterBins(){ try { rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol)); rabund->set(smallRow, 0); rabund->setLabel(toString(smallDist)); } catch(exception& e) { m->errorOut(e, "Cluster", "clusterBins"); exit(1); } } /***********************************************************************/ void Cluster::clusterNames(){ try { if (mapWanted) { updateMap(); } list->set(smallCol, list->get(smallRow)+','+list->get(smallCol)); list->set(smallRow, ""); list->setLabel(toString(smallDist)); } catch(exception& e) { m->errorOut(e, "Cluster", "clusterNames"); exit(1); } } /***********************************************************************/ bool Cluster::update(double& cutOFF){ try { smallCol = dMatrix->getSmallestCell(smallRow); nColCells = dMatrix->seqVec[smallCol].size(); nRowCells = dMatrix->seqVec[smallRow].size(); vector foundCol(nColCells, 0); int search; bool changed = false; for (int i=nRowCells-1;i>=0;i--) { //matrix indexes sorted from largest to smallest, so start at smallest index if (m->getControl_pressed()) { break; } //if you are not the smallCell if (dMatrix->seqVec[smallRow][i].index != smallCol) { search = dMatrix->seqVec[smallRow][i].index; bool merged = false; for (int j=0;jseqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance if (dMatrix->seqVec[smallCol][j].index == search) { //we found a distance for the merge foundCol[j] = 1; merged = true; changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]); dMatrix->updateCellCompliment(smallCol, j); break; }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell if (!util.isEqual(adjust, -1)) { //adjust merged = true; PDistCell value(search, adjust); //create a distance for the missing value int location = dMatrix->addCellSorted(smallCol, value); changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]); dMatrix->updateCellCompliment(smallCol, location); nColCells++; foundCol.push_back(0); //add a new found column //adjust value for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; } foundCol[location] = 1; } j+=nColCells; //jump out of loop and remove cell below } } } //if not merged it you need it for warning if ((!merged) && (method == "average" || method == "weighted")) { if (cutOFF > dMatrix->seqVec[smallRow][i].dist) { cutOFF = dMatrix->seqVec[smallRow][i].dist; } } if ((method == "nearest") && (!merged)) { //you are a row dist without a column dist, add you as a column dist PDistCell value(search, dMatrix->seqVec[smallRow][i].dist); //create a distance for the missing value int location = dMatrix->addCellSorted(smallCol, value); nColCells++; foundCol.push_back(0); //add a new found column //adjust value for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; } foundCol[location] = 1; } dMatrix->rmCell(smallRow, i); } } clusterBins(); clusterNames(); if (method == "nearest") { for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col if (foundCol[i] == 0) { //not found if (dMatrix->seqVec[smallCol][i].index == smallRow) { //you are smallest distance dMatrix->rmCell(smallCol, i); break; } } } }else { for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col if (foundCol[i] == 0) { //not found if (!util.isEqual(adjust, -1)) { //adjust PDistCell value(smallCol, adjust); //create a distance for the missing value changed = updateDistance(dMatrix->seqVec[smallCol][i], value); dMatrix->updateCellCompliment(smallCol, i); }else { if (method == "average" || method == "weighted") { if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance if (cutOFF > dMatrix->seqVec[smallCol][i].dist) { cutOFF = dMatrix->seqVec[smallCol][i].dist; } } } } dMatrix->rmCell(smallCol, i); } } } //dMatrix->print(); return changed; } catch(exception& e) { m->errorOut(e, "Cluster", "update"); exit(1); } } /***********************************************************************/ void Cluster::setMapWanted(bool f) { try { mapWanted = f; //initialize map for (int k = 0; k < list->getNumBins(); k++) { string names = list->get(k); //parse bin string individual = ""; int binNameslength = names.size(); for(int j=0;jerrorOut(e, "Cluster", "setMapWanted"); exit(1); } } /***********************************************************************/ void Cluster::updateMap() { try { //update location of seqs in smallRow since they move to smallCol now string names = list->get(smallRow); string individual = ""; int binNameslength = names.size(); for(int j=0;jerrorOut(e, "Cluster", "updateMap"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/cluster.hpp000077500000000000000000000046211424121717000166210ustar00rootroot00000000000000#ifndef CLUSTER_H #define CLUSTER_H #include "sparsedistancematrix.h" #include "optimatrix.h" #include "mothurout.h" #include "rabundvector.hpp" #include "listvector.hpp" class ListVector; class Cluster { public: Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float); Cluster() { m = MothurOut::getInstance(); } virtual ~Cluster() = default; virtual bool update(double&); virtual string getTag() = 0; virtual void setMapWanted(bool m); virtual map getSeqtoBin() { return seq2Bin; } protected: virtual bool updateDistance(PDistCell& colCell, PDistCell& rowCell) = 0; virtual void clusterBins(); virtual void clusterNames(); virtual void updateMap(); RAbundVector* rabund; ListVector* list; SparseDistanceMatrix* dMatrix; ull smallRow, smallCol, nRowCells, nColCells; float smallDist, adjust, cutoff; bool mapWanted; map seq2Bin; string method; MothurOut* m; Utils util; }; /***********************************************************************/ class CompleteLinkage : public Cluster { public: CompleteLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float); bool updateDistance(PDistCell& colCell, PDistCell& rowCell); string getTag(); private: }; /***********************************************************************/ class SingleLinkage : public Cluster { public: SingleLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float); //void update(double&); bool updateDistance(PDistCell& colCell, PDistCell& rowCell); string getTag(); private: }; /***********************************************************************/ class AverageLinkage : public Cluster { public: AverageLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float); bool updateDistance(PDistCell& colCell, PDistCell& rowCell); string getTag(); private: int saveRow; int saveCol; int rowBin; int colBin; int totalBin; }; /***********************************************************************/ class WeightedLinkage : public Cluster { public: WeightedLinkage(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string, float); bool updateDistance(PDistCell& colCell, PDistCell& rowCell); string getTag(); private: int saveRow; int saveCol; }; /***********************************************************************/ #endif mothur-1.48.0/source/clusterclassic.cpp000077500000000000000000000427351424121717000201660ustar00rootroot00000000000000/* * clusterclassic.cpp * Mothur * * Created by westcott on 10/29/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "clusterclassic.h" #include "utils.hpp" /***********************************************************************/ ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), nseqs(0), sim(s) { try { smallDist = MOTHURMAX; mapWanted = false; //set to true by mgcluster to speed up overlap merge //save so you can modify as it changes in average neighbor cutoff = c; aboveCutoff = cutoff + 10000.0; m = MothurOut::getInstance(); if(method == "furthest") { tag = "fn"; } else if (method == "average") { tag = "an"; } else if (method == "weighted") { tag = "wn"; } else if (method == "nearest") { tag = "nn"; } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "ClusterClassic"); exit(1); } } /***********************************************************************/ void ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { try { double distance; bool square = false; string name; vector matrixNames; ifstream fileHandle; Utils util; util.openInputFile(filename, fileHandle); string numTest; fileHandle >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } matrixNames.push_back(name); if(nameMap == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(nameMap->getListVector()); if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } } //initialize distance matrix to cutoff dMatrix.resize(nseqs); for (int i = 1; i < nseqs; i++) { dMatrix[i].resize(i, aboveCutoff); } char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = true; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = false; break; } } if(!square){ int index = 0; for(int i=1;igetControl_pressed()) { fileHandle.close(); return; } fileHandle >> name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(nameMap == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { fileHandle.close(); return; } fileHandle >> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. dMatrix[i][j] = distance; if (distance < smallDist) { smallDist = distance; } index++; } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); int col = nameMap->get(matrixNames[j]); if (row < col) { dMatrix[col][row] = distance; } else { dMatrix[row][col] = distance; } index++; } } } } else{ int index = nseqs; for(int i=1;i> name; matrixNames.push_back(name); if(nameMap == nullptr){ list->set(i, name); for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } dMatrix[i][j] = distance; } index++; } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (distance == -1) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); int col = nameMap->get(matrixNames[j]); if (row < col) { dMatrix[col][row] = distance; } else { dMatrix[row][col] = distance; } } index++; } } } } if (m->getControl_pressed()) { fileHandle.close(); return; } list->setLabel("0"); rabund = new RAbundVector(list->getRAbundVector()); fileHandle.close(); } catch(exception& e) { m->errorOut(e, "ClusterClassic", "readPhylipFile"); exit(1); } } /***********************************************************************/ void ClusterClassic::readPhylipFile(string filename, CountTable* countTable) { try { double distance; bool square = false; string name; vector matrixNames; ifstream fileHandle; Utils util; util.openInputFile(filename, fileHandle); string numTest; fileHandle >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } matrixNames.push_back(name); if(countTable == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(countTable->getListVector()); } //initialize distance matrix to cutoff dMatrix.resize(nseqs); //rowSmallDists.resize(nseqs, temp); for (int i = 1; i < nseqs; i++) { dMatrix[i].resize(i, aboveCutoff); } char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = true; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = false; break; } } if(!square){ int index = 0; for(int i=1;igetControl_pressed()) { fileHandle.close(); return; } fileHandle >> name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(countTable == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { fileHandle.close(); return; } fileHandle >> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. dMatrix[i][j] = distance; if (distance < smallDist) { smallDist = distance; } index++; } } else{ for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if (distance < smallDist) { smallDist = distance; } int row = countTable->get(matrixNames[i]); int col = countTable->get(matrixNames[j]); if (row < col) { dMatrix[col][row] = distance; } else { dMatrix[row][col] = distance; } index++; } } } } else{ int index = nseqs; for(int i=1;i> name; matrixNames.push_back(name); if(countTable == nullptr){ list->set(i, name); for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } dMatrix[i][j] = distance; } index++; } } else{ for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } int row = countTable->get(matrixNames[i]); int col = countTable->get(matrixNames[j]); if (row < col) { dMatrix[col][row] = distance; } else { dMatrix[row][col] = distance; } } index++; } } } } if (m->getControl_pressed()) { fileHandle.close(); return; } list->setLabel("0"); rabund = new RAbundVector(); rabund->setLabel(list->getLabel()); for(int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { break; } vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += countTable->getNumSeqs(binNames[j]); } rabund->push_back(total); } fileHandle.close(); } catch(exception& e) { m->errorOut(e, "ClusterClassic", "readPhylipFile"); exit(1); } } /***********************************************************************/ //sets smallCol and smallRow, returns distance double ClusterClassic::getSmallCell() { try { smallDist = aboveCutoff; smallRow = 1; smallCol = 0; vector mins; for(int i=1;ierrorOut(e, "ClusterClassic", "getSmallCell"); exit(1); } } /***********************************************************************/ void ClusterClassic::clusterBins(){ try { rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol)); rabund->set(smallCol, 0); rabund->setLabel(toString(smallDist)); } catch(exception& e) { m->errorOut(e, "ClusterClassic", "clusterBins"); exit(1); } } /***********************************************************************/ void ClusterClassic::clusterNames(){ try { if (mapWanted) { updateMap(); } list->set(smallRow, list->get(smallRow)+','+list->get(smallCol)); list->set(smallCol, ""); list->setLabel(toString(smallDist)); } catch(exception& e) { m->errorOut(e, "ClusterClassic", "clusterNames"); exit(1); } } /***********************************************************************/ void ClusterClassic::update(double& cutOFF){ try { getSmallCell(); int r, c; r = smallRow; c = smallCol; for(int i=0;i r) { distRow = dMatrix[i][r]; } else { distRow = dMatrix[r][i]; } if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell else { distCol = dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; } if(method == "furthest"){ newDist = max(distRow, distCol); } else if (method == "average"){ int rowBin = rabund->get(r); int colBin = rabund->get(c); newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); } else if (method == "weighted"){ newDist = (distCol + distRow) / 2.0; } else if (method == "nearest"){ newDist = min(distRow, distCol); } if (i > r) { dMatrix[i][r] = newDist; } else { dMatrix[r][i] = newDist; } } } clusterBins(); clusterNames(); } catch(exception& e) { m->errorOut(e, "ClusterClassic", "update"); exit(1); } } /***********************************************************************/ void ClusterClassic::setMapWanted(bool f) { try { mapWanted = f; Utils util; //initialize map for (int i = 0; i < list->getNumBins(); i++) { //parse bin string names = list->get(i); vector binnames; util.splitAtComma(names, binnames); for (int j = 0; j < binnames.size(); j++) { //save name and bin number seq2Bin[binnames[j]] = i; } } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "setMapWanted"); exit(1); } } /***********************************************************************/ void ClusterClassic::updateMap() { try { //update location of seqs in smallRow since they move to smallCol now string names = list->get(smallRow); vector binnames; Utils util; util.splitAtComma(names, binnames); for (int j = 0; j < binnames.size(); j++) { //save name and bin number seq2Bin[binnames[j]] = smallCol; } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "updateMap"); exit(1); } } /***********************************************************************/ void ClusterClassic::print() { try { //update location of seqs in smallRow since they move to smallCol now for (int i = 0; i < dMatrix.size(); i++) { m->mothurOut("row = " + toString(i) + "\t"); for (int j = 0; j < dMatrix[i].size(); j++) { m->mothurOut(toString(dMatrix[i][j]) + "\t"); } m->mothurOutEndLine(); } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "updateMap"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/clusterclassic.h000077500000000000000000000022401424121717000176160ustar00rootroot00000000000000#ifndef CLUSTERCLASSIC_H #define CLUSTERCLASSIC_H #include "mothurout.h" #include "listvector.hpp" #include "rabundvector.hpp" #include "nameassignment.hpp" #include "counttable.h" /* * clusterclassic.h * Mothur * * Created by westcott on 10/29/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ class ClusterClassic { public: ClusterClassic(float, string, bool); void readPhylipFile(string, NameAssignment*); void readPhylipFile(string, CountTable*); void update(double&); double getSmallDist() { return smallDist; } int getNSeqs() { return nseqs; } ListVector* getListVector() { return list; } RAbundVector* getRAbundVector() { return rabund; } string getTag() { return tag; } void setMapWanted(bool m); map getSeqtoBin() { return seq2Bin; } private: double getSmallCell(); void clusterBins(); void clusterNames(); void updateMap(); void print(); RAbundVector* rabund; ListVector* list; vector< vector > dMatrix; int smallRow; int smallCol, nseqs; double smallDist; bool mapWanted, sim; double cutoff, aboveCutoff; map seq2Bin; string method, tag; MothurOut* m; }; #endif mothur-1.48.0/source/collect.cpp000077500000000000000000000123411424121717000165560ustar00rootroot00000000000000/* * collect.cpp * Dotur * * Created by Sarah Westcott on 11/18/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "collect.h" /***********************************************************************/ int Collect::getCurve(float percentFreq = 0.01){ try { RAbundVector rabund(order->getNumBins()); SAbundVector rank(order->getMaxRank()+1); //sets displays label for(int i=0;iinit(label); } CollectorsCurveData ccd; ccd.registerDisplays(displays); //convert freq percentage to number int increment = 1; if (percentFreq < 1.0) { increment = numSeqs * percentFreq; } else { increment = percentFreq; } for(int i=0;igetControl_pressed()) { return 1; } int binNumber = order->get(i); int abundance = rabund.get(binNumber); rank.set(abundance, rank.get(abundance)-1); abundance++; rabund.set(binNumber, abundance); rank.set(abundance, rank.get(abundance)+1); //increment rank(abundance) if((i == 0) || (i+1) % increment == 0){ ccd.updateRankData(rank); } } if(numSeqs % increment != 0){ ccd.updateRankData(rank); } for(int i=0;ireset(); } return 0; } catch(exception& e) { m->errorOut(e, "Collect", "getCurve"); exit(1); } } /***********************************************************************/ int Collect::getSharedCurve(float percentFreq = 0.01){ try { vector lookup; map indexLookup; //create and initialize vector of sharedvectors, one for each group vector groups = sharedorder->getGroups(); for (int i = 0; i < groups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(sharedorder->getMaxRank()+1); temp->setLabel(sharedorder->getLabel()); temp->setGroup(groups[i]); indexLookup[groups[i]] = i; lookup.push_back(temp); } map groupComboToColumn = getGroupComb(groups); //makes 'uniqueAB uniqueAC uniqueBC' if your groups are A, B, C SharedCollectorsCurveData ccd; ccd.registerDisplays(displays); //adds a displays to ccd management //convert freq percentage to number int increment = 1; if (percentFreq < 1.0) { increment = numSeqs * percentFreq; } else { increment = percentFreq; } for(int i=0;igetControl_pressed()) { break; } //get first sample individual chosen = sharedorder->get(i); lookup[indexLookup[chosen.group]]->increment(chosen.binNumber); //calculate at 0 and the given increment if((i == 0) || (i+1) % increment == 0){ ccd.updateSharedData(lookup, i+1, groupComboToColumn); } } if (m->getControl_pressed()) { for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 1; } //calculate last label if you haven't already if(numSeqs % increment != 0){ ccd.updateSharedData(lookup, numSeqs, groupComboToColumn); } //resets output files for(int i=0;ireset(); } //memory cleanup for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } catch(exception& e) { m->errorOut(e, "Collect", "getSharedCurve"); exit(1); } } /**************************************************************************************/ map Collect::getGroupComb(vector mGroups) { string group; numGroupComb = 0; map groupComboToColumn; int numGroups = mGroups.size(); for (int i = 0; i < (numGroups - 1); i++) { for (int l = i+1; l < numGroups; l++) { group = mGroups[i] +"_"+ mGroups[l]; groupComb.push_back(group); groupComboToColumn[group] = numGroupComb; numGroupComb++; } } for(int i=0;ihasLciHci(); groupLabel = ""; for (int s = 0; s < groupComb.size(); s++) { if (hasLciHci) { groupLabel += label +"_"+ groupComb[s] + "\t" + label + groupComb[s] + "lci\t" + label + groupComb[s] + "hci\t"; } else{ groupLabel += label +"_"+ groupComb[s] + "\t"; } } string groupLabelAll = groupLabel + label +"_"+ "all\t"; if ((displays[i]->isCalcMultiple() ) && (displays[i]->getAll() )) { displays[i]->init(groupLabelAll); } else { displays[i]->init(groupLabel); } } return groupComboToColumn; } /**************************************************************************************/ mothur-1.48.0/source/collect.h000077500000000000000000000017741424121717000162330ustar00rootroot00000000000000#ifndef COLLECT_H #define COLLECT_H #include "collectorscurvedata.h" #include "display.h" #include "ordervector.hpp" #include "sharedordervector.h" /***********************************************************************/ class Collect { public: Collect(OrderVector* order, vector disp) : numSeqs(order->getNumSeqs()), order(order), displays(disp), label(order->getLabel()) { m = MothurOut::getInstance(); }; Collect(SharedOrderVector* sharedorder, vector disp) : numSeqs(sharedorder->getNumSeqs()), sharedorder(sharedorder), displays(disp), label(sharedorder->getLabel()) { m = MothurOut::getInstance(); } ~Collect(){ }; int getCurve(float); int getSharedCurve(float); private: MothurOut* m; SharedOrderVector* sharedorder; OrderVector* order; vector displays; int numSeqs, numGroupComb; string label, groupLabel; vector groupComb; bool validGroup(vector, string); map getGroupComb(vector); }; #endif mothur-1.48.0/source/collectdisplay.h000077500000000000000000000067021424121717000176150ustar00rootroot00000000000000#ifndef COLLECTDISPLAY_H #define COLLECTDISPLAY_H #include "calculator.h" #include "fileoutput.h" #include "display.h" /* There is a display for each calculator. The CollectorsCurveData class manages the displays. It sends each one either a sabundvector (collect.single) or a set of samples for a shared file (collect.shared) to find the results for. The display arranges the data and sends it to the FileOutput class for writing. */ /***********************************************************************/ class CollectDisplay : public Display { public: CollectDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file) { timesCalled = 0; } ~CollectDisplay() { delete estimate; delete output; } //used by collect.single void update(SAbundVector& rank){ nSeqs=rank.getNumSeqs(); data = estimate->getValues(&rank); output->updateOutput(nSeqs, data); } /* This function is called by the collect class. The collect class is passing pairs of samples, as well as the all samples if a multi calc is used. This function assembles a row of output data. It makes sure the output is assembled in the same order as the labels in the header column. It then sends the entire row to the file output class to handle the file writing. */ void update(vector& shared, int numSeqs, bool pairs, map groupComboToColumn){ timesCalled++; data = estimate->getValues(shared); //passes estimators a shared vector from each group to be compared //figure out what groups are being compared in getValues //because we randomizes the order we need to put the results in the correct column in the output file //pos tells you which column in the output file you are in string groupComboName = shared[0]->getGroup() +"_"+ shared[1]->getGroup(); numGroupComb = groupComboToColumn.size(); if (!pairs && all) { groupComboName = "all"; groupComboToColumn[groupComboName] = numGroupComb; numGroupComb++; } map::iterator it = groupComboToColumn.find(groupComboName); int pos = 0; if (it != groupComboToColumn.end()) { pos = it->second * data.size(); //combo location * 1, or comboLocation * 3 for lci/hci }else { cout << groupComboName << " shouldn't get here\n"; } //fills groupdata with datas info groupData.resize((numGroupComb*data.size()), 0); for (int i = 0; i < data.size(); i++) { groupData[pos+i] = data[i]; } //when you get all your groups info then output if ((timesCalled % numGroupComb) == 0) { output->updateOutput(numSeqs, groupData); } } void init(string s) { output->setLabelName(s); } void reset() { output->resetFile(); } void close() { output->resetFile(); } void setAll(bool a) { all = a; } bool getAll() { return all; } string getName() { return estimate->getName(); } bool isCalcMultiple() { return estimate->getMultiple(); } bool calcNeedsAll() { return estimate->getNeedsAll(); } bool hasLciHci() { if (estimate->getCols() == 3) { return true; } else{ return false; } } private: Calculator* estimate; FileOutput* output; int nSeqs, timesCalled, numGroupComb; vector data; vector groupData; bool all; }; /***********************************************************************/ #endif mothur-1.48.0/source/collectorscurvedata.h000077500000000000000000000053211424121717000206460ustar00rootroot00000000000000#ifndef COLLECTORSCURVEDATA_H #define COLLECTORSCURVEDATA_H #include "sabundvector.hpp" #include "sharedrabundvectors.hpp" #include "display.h" #include "observable.h" /***********************************************************************/ class CollectorsCurveData : public Observable { public: CollectorsCurveData() : rank(0) {}; void registerDisplay(Display* o) { displays.insert(o); } void registerDisplays(vector o) { for(int i=0;i::iterator pos=displays.begin();pos!=displays.end();pos++){ (*pos)->update(rank); } } private: set displays; SAbundVector rank; }; /***********************************************************************/ class SharedCollectorsCurveData : public Observable { public: SharedCollectorsCurveData()=default; void registerDisplay(Display* o) { displays.insert(o); } void registerDisplays(vector o) { for(int i=0;i& shared, int numSeqs, map& groupComboToColumn) { for (int k = 0; k < (shared.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = k+1; l < shared.size(); l++) { for(set::iterator pos=displays.begin();pos!=displays.end();pos++){ vector subset; //add new pair of sharedrabund vectors subset.push_back(shared[k]); subset.push_back(shared[l]); if ((*pos)->calcNeedsAll()) { //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < shared.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(shared[w]); } } (*pos)->update(subset, numSeqs, true, groupComboToColumn); }else { (*pos)->update(subset, numSeqs, true, groupComboToColumn); } } } } //if this is a calculator that can do multiples then do them for(set::iterator pos=displays.begin();pos!=displays.end();pos++){ if ((*pos)->isCalcMultiple() && (*pos)->getAll()) { (*pos)->update(shared, numSeqs, false, groupComboToColumn); } } } private: set displays; }; /***********************************************************************/ #endif mothur-1.48.0/source/commandfactory.cpp000066400000000000000000001471451424121717000201470ustar00rootroot00000000000000/* * commandfactory.cpp * * * Created by Pat Schloss on 10/25/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "command.hpp" #include "clustercommand.h" #include "collectcommand.h" #include "collectsharedcommand.h" #include "getgroupcommand.h" #include "getlabelcommand.h" #include "rarefactcommand.h" #include "summarycommand.h" #include "summarysharedcommand.h" #include "rarefactsharedcommand.h" #include "quitcommand.h" #include "helpcommand.h" #include "commandfactory.hpp" #include "uniqueseqscommand.h" #include "parsimonycommand.h" #include "unifracunweightedcommand.h" #include "unifracweightedcommand.h" #include "libshuffcommand.h" #include "heatmapcommand.h" #include "heatmapsimcommand.h" #include "filterseqscommand.h" #include "venncommand.h" #include "nocommands.h" #include "binsequencecommand.h" #include "getoturepcommand.h" #include "treesharedcommand.h" #include "distancecommand.h" #include "aligncommand.h" #include "distsharedcommand.h" #include "getsabundcommand.h" #include "getrabundcommand.h" #include "seqsummarycommand.h" #include "screenseqscommand.h" #include "reversecommand.h" #include "trimseqscommand.h" #include "mergefilecommand.h" #include "listseqscommand.h" #include "getseqscommand.h" #include "removeseqscommand.h" #include "systemcommand.h" #include "aligncheckcommand.h" #include "getsharedotucommand.h" #include "getlistcountcommand.h" #include "classifyseqscommand.h" #include "phylotypecommand.h" #include "mgclustercommand.h" #include "preclustercommand.h" #include "pcoacommand.h" #include "otuhierarchycommand.h" #include "setdircommand.h" #include "chimeraccodecommand.h" #include "chimeracheckcommand.h" #include "chimeraslayercommand.h" #include "chimerapintailcommand.h" #include "chimerabellerophoncommand.h" #include "chimerauchimecommand.h" #include "setlogfilecommand.h" #include "phylodiversitycommand.h" #include "makegroupcommand.h" #include "chopseqscommand.h" #include "clearcutcommand.h" #include "splitabundcommand.h" #include "clustersplitcommand.h" #include "classifyotucommand.h" #include "degapseqscommand.h" #include "getrelabundcommand.h" #include "sensspeccommand.h" #include "sffinfocommand.h" #include "seqerrorcommand.h" #include "normalizesharedcommand.h" #include "metastatscommand.h" #include "splitgroupscommand.h" #include "clusterfragmentscommand.h" #include "getlineagecommand.h" #include "removelineagecommand.h" #include "fastaqinfocommand.h" #include "deuniqueseqscommand.h" #include "pairwiseseqscommand.h" #include "clusterdoturcommand.h" #include "subsamplecommand.h" #include "removegroupscommand.h" #include "getgroupscommand.h" #include "indicatorcommand.h" #include "consensusseqscommand.h" #include "trimflowscommand.h" #include "corraxescommand.h" #include "shhhercommand.h" #include "pcacommand.h" #include "nmdscommand.h" #include "removerarecommand.h" #include "mergegroupscommand.h" #include "amovacommand.h" #include "homovacommand.h" #include "mantelcommand.h" #include "makefastqcommand.h" #include "anosimcommand.h" #include "getcurrentcommand.h" #include "setcurrentcommand.h" #include "makesharedcommand.h" #include "deuniquetreecommand.h" #include "countseqscommand.h" #include "countgroupscommand.h" #include "summarytaxcommand.h" #include "chimeraperseuscommand.h" #include "shhhseqscommand.h" #include "summaryqualcommand.h" #include "otuassociationcommand.h" #include "sortseqscommand.h" #include "classifytreecommand.h" #include "cooccurrencecommand.h" #include "pcrseqscommand.h" #include "createdatabasecommand.h" #include "makebiomcommand.h" #include "getcoremicrobiomecommand.h" #include "listotuscommand.h" #include "getotuscommand.h" #include "removeotuscommand.h" #include "makecontigscommand.h" #include "sffmultiplecommand.h" #include "classifysvmsharedcommand.h" #include "filtersharedcommand.h" #include "primerdesigncommand.h" #include "getdistscommand.h" #include "removedistscommand.h" #include "mergetaxsummarycommand.h" #include "getmetacommunitycommand.h" #include "sparcccommand.h" #include "makelookupcommand.h" #include "renameseqscommand.h" #include "makelefsecommand.h" #include "lefsecommand.h" #include "kruskalwalliscommand.h" #include "sracommand.h" #include "mergesfffilecommand.h" #include "getmimarkspackagecommand.h" #include "mimarksattributescommand.h" #include "setseedcommand.h" #include "makefilecommand.h" #include "biominfocommand.h" #include "renamefilecommand.h" #include "chimeravsearchcommand.h" #include "mergecountcommand.hpp" #include "clusterfitcommand.hpp" #include "mergeotuscommand.hpp" #include "diversityestimatorcommand.hpp" #include "srainfocommand.hpp" #include "makeclrcommand.hpp" #include "translateseqscommand.hpp" #include "alignmusclecommand.hpp" /*******************************************************/ /******************************************************/ CommandFactory* CommandFactory::getInstance() { if( _uniqueInstance == 0) { _uniqueInstance = new CommandFactory(); } return _uniqueInstance; } /***********************************************************/ /***********************************************************/ CommandFactory::CommandFactory(){ string s = ""; m = MothurOut::getInstance(); current = CurrentFile::getInstance(); current->setOutputDir(""); current->setInputDir(nullVector); append = false; //initialize list of valid commands commands["make.shared"] = "make.shared"; commands["bin.seqs"] = "bin.seqs"; commands["get.oturep"] = "get.oturep"; commands["cluster"] = "cluster"; commands["unique.seqs"] = "unique.seqs"; commands["dist.shared"] = "dist.shared"; commands["collect.single"] = "collect.single"; commands["collect.shared"] = "collect.shared"; commands["rarefaction.single"] = "rarefaction.single"; commands["rarefaction.shared"] = "rarefaction.shared"; commands["summary.single"] = "summary.single"; commands["summary.shared"] = "summary.shared"; commands["parsimony"] = "parsimony"; commands["unifrac.weighted"] = "unifrac.weighted"; commands["unifrac.unweighted"] = "unifrac.unweighted"; commands["libshuff"] = "libshuff"; commands["tree.shared"] = "tree.shared"; commands["heatmap.bin"] = "heatmap.bin"; commands["heatmap.sim"] = "heatmap.sim"; commands["venn"] = "venn"; commands["get.group"] = "get.group"; commands["get.label"] = "get.label"; commands["get.sabund"] = "get.sabund"; commands["get.rabund"] = "get.rabund"; commands["help"] = "help"; commands["reverse.seqs"] = "reverse.seqs"; commands["trim.seqs"] = "trim.seqs"; commands["trim.flows"] = "trim.flows"; commands["list.seqs"] = "list.seqs"; commands["get.seqs"] = "get.seqs"; commands["remove.seqs"] = "remove.seqs"; commands["system"] = "system"; commands["align.check"] = "align.check"; commands["get.sharedseqs"] = "get.sharedseqs"; commands["get.otulist"] = "get.otulist"; commands["phylotype"] = "phylotype"; commands["mgcluster"] = "mgcluster"; commands["pre.cluster"] = "pre.cluster"; commands["pcoa"] = "pcoa"; commands["otu.hierarchy"] = "otu.hierarchy"; commands["set.dir"] = "set.dir"; commands["merge.files"] = "merge.files"; commands["set.logfile"] = "set.logfile"; commands["phylo.diversity"] = "phylo.diversity"; commands["make.group"] = "make.group"; commands["make.count"] = "make.count"; commands["chop.seqs"] = "chop.seqs"; commands["clearcut"] = "clearcut"; commands["split.abund"] = "split.abund"; commands["classify.otu"] = "classify.otu"; commands["degap.seqs"] = "degap.seqs"; commands["get.relabund"] = "get.relabund"; commands["sffinfo"] = "sffinfo"; commands["normalize.shared"] = "normalize.shared"; commands["metastats"] = "metastats"; commands["split.groups"] = "split.groups"; commands["cluster.fragments"] = "cluster.fragments"; commands["get.lineage"] = "get.lineage"; commands["remove.lineage"] = "remove.lineage"; commands["fastq.info"] = "fastq.info"; commands["deunique.seqs"] = "deunique.seqs"; commands["cluster.classic"] = "cluster.classic"; commands["sub.sample"] = "sub.sample"; commands["remove.groups"] = "remove.groups"; commands["get.groups"] = "get.groups"; commands["get.otus"] = "get.otus"; commands["remove.otus"] = "remove.otus"; commands["indicator"] = "indicator"; commands["consensus.seqs"] = "consensus.seqs"; commands["corr.axes"] = "corr.axes"; commands["pca"] = "pca"; commands["nmds"] = "nmds"; commands["remove.rare"] = "remove.rare"; commands["amova"] = "amova"; commands["homova"] = "homova"; commands["mantel"] = "mantel"; commands["anosim"] = "anosim"; commands["make.fastq"] = "make.fastq"; commands["merge.groups"] = "merge.groups"; commands["get.current"] = "get.current"; commands["set.current"] = "set.current"; commands["deunique.tree"] = "deunique.tree"; commands["count.seqs"] = "count.seqs"; commands["count.groups"] = "count.groups"; commands["pairwise.seqs"] = "pairwise.seqs"; commands["classify.seqs"] = "classify.seqs"; commands["dist.seqs"] = "dist.seqs"; commands["filter.seqs"] = "filter.seqs"; commands["align.seqs"] = "align.seqs"; commands["chimera.ccode"] = "chimera.ccode"; commands["chimera.check"] = "chimera.check"; commands["chimera.slayer"] = "chimera.slayer"; commands["chimera.uchime"] = "chimera.uchime"; commands["chimera.perseus"] = "chimera.perseus"; commands["chimera.pintail"] = "chimera.pintail"; commands["chimera.bellerophon"] = "chimera.bellerophon"; commands["chimera.vsearch"] = "chimera.vsearch"; commands["screen.seqs"] = "screen.seqs"; commands["summary.seqs"] = "summary.seqs"; commands["cluster.split"] = "cluster.split"; commands["shhh.flows"] = "shhh.flows"; commands["sens.spec"] = "sens.spec"; commands["seq.error"] = "seq.error"; commands["summary.tax"] = "summary.tax"; commands["summary.qual"] = "summary.qual"; commands["shhh.seqs"] = "shhh.seqs"; commands["otu.association"] = "otu.association"; commands["sort.seqs"] = "sort.seqs"; commands["classify.tree"] = "classify.tree"; commands["cooccurrence"] = "cooccurrence"; commands["pcr.seqs"] = "pcr.seqs"; commands["create.database"] = "create.database"; commands["make.biom"] = "make.biom"; commands["get.coremicrobiome"] = "get.coremicrobiome"; commands["list.otus"] = "list.otus"; commands["list.otulabels"] = "list.otulabels"; commands["get.otulabels"] = "get.otulabels"; commands["remove.otulabels"] = "remove.otulabels"; commands["make.contigs"] = "make.contigs"; commands["make.table"] = "make.table"; commands["sff.multiple"] = "sff.multiple"; commands["quit"] = "quit"; commands["classify.svm"] = "classify.svm"; commands["filter.shared"] = "filter.shared"; commands["primer.design"] = "primer.design"; commands["get.dists"] = "get.dists"; commands["remove.dists"] = "remove.dists"; commands["merge.taxsummary"] = "merge.taxsummary"; commands["get.communitytype"] = "get.communitytype"; commands["sparcc"] = "sparcc"; commands["make.lookup"] = "make.lookup"; commands["rename.seqs"] = "rename.seqs"; commands["make.lefse"] = "make.lefse"; commands["lefse"] = "lefse"; commands["kruskal.wallis"] = "kruskal.wallis"; commands["make.sra"] = "make.sra"; commands["merge.sfffiles"] = "merge.sfffiles"; commands["get.mimarkspackage"] = "get.mimarkspackage"; commands["mimarks.attributes"] = "mimarks.attributes"; commands["make.file"] = "make.file"; commands["biom.info"] = "biom.info"; commands["set.seed"] = "set.seed"; commands["rename.file"] = "rename.file"; commands["merge.count"] = "merge.count"; commands["cluster.fit"] = "cluster.fit"; commands["merge.otus"] = "merge.otus"; commands["estimator.single"] = "estimator.single"; commands["sra.info"] = "sra.info"; commands["make.clr"] = "make.clr"; commands["tranlate.seqs"] = "tranlate.seqs"; //commands["align.muscle"] = "align.muscle"; } /***********************************************************/ CommandFactory::~CommandFactory(){ _uniqueInstance = 0; } /***********************************************************/ /***********************************************************/ int CommandFactory::checkForRedirects(string optionString) { try { Utils util; int pos = (int)optionString.find("outputdir"); if (pos != string::npos) { //user has set outputdir in command option string string outputOption = ""; bool foundEquals = false; for(int i=pos;isetOutputDir(outputOption); m->mothurOut("Setting output directory to: " + outputOption); m->mothurOutEndLine(); } } pos = (int)optionString.find("inputdir"); if (pos != string::npos) { //user has set inputdir in command option string string intputOption = ""; bool foundEquals = false; for(int i=pos;i inputPaths; vector temp; util.splitAtChar(intputOption, temp, ';'); for (int i = 0; i < temp.size(); i++) { string inputPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = inputPath.substr(inputPath.length()-1); if (lastChar != PATH_SEPARATOR) { inputPath += PATH_SEPARATOR; } inputPath = util.getFullPathName(inputPath); if (util.dirCheckExists(inputPath)) { inputPaths.push_back(inputPath); } } if (inputPaths.size() != 0) { m->mothurOut("Setting input directories to: \n"); for (int i = 0; i < inputPaths.size(); i++) { m->mothurOut("\t" + inputPaths[i] + "\n"); } m->mothurOutEndLine(); current->setInputDir(inputPaths); } } pos = (int)optionString.find("seed="); if (pos != string::npos) { //user has set seed in command option string string intputOption = ""; bool foundEquals = false; for(int i=pos;imothurOut("[ERROR]: Seed must be an integer.\n"); seed = false;} } if (seed) { m->mothurOut("Setting random seed to " + toString(random) + ".\n\n"); m->setRandomSeed(random); } } pos = (int)optionString.find("mothurcalling=true"); if (pos != string::npos) { //user has set seed in command option string current->setMothurCalling(true); }else { current->setMothurCalling(false); } return 0; } catch(exception& e) { m->errorOut(e, "CommandFactory", "getCommand"); exit(1); } } /***********************************************************/ /***********************************************************/ //This function calls the appropriate command fucntions based on user input. Command* CommandFactory::getCommand(string commandName, string optionString){ try { Command* command = nullptr; if ((commandName != "help") && (commandName != "system")) { checkForRedirects(optionString); } if(commandName == "cluster") { command = new ClusterCommand(optionString); } else if(commandName == "unique.seqs") { command = new UniqueSeqsCommand(optionString); } else if(commandName == "parsimony") { command = new ParsimonyCommand(optionString); } else if(commandName == "help") { command = new HelpCommand(optionString); } else if(commandName == "quit") { command = new QuitCommand(optionString); } else if(commandName == "collect.single") { command = new CollectCommand(optionString); } else if(commandName == "collect.shared") { command = new CollectSharedCommand(optionString); } else if(commandName == "rarefaction.single") { command = new RareFactCommand(optionString); } else if(commandName == "rarefaction.shared") { command = new RareFactSharedCommand(optionString); } else if(commandName == "summary.single") { command = new SummaryCommand(optionString); } else if(commandName == "summary.shared") { command = new SummarySharedCommand(optionString); } else if(commandName == "unifrac.weighted") { command = new UnifracWeightedCommand(optionString); } else if(commandName == "unifrac.unweighted") { command = new UnifracUnweightedCommand(optionString); } else if(commandName == "get.group") { command = new GetgroupCommand(optionString); } else if(commandName == "get.label") { command = new GetlabelCommand(optionString); } else if(commandName == "get.sabund") { command = new GetSAbundCommand(optionString); } else if(commandName == "get.rabund") { command = new GetRAbundCommand(optionString); } else if(commandName == "libshuff") { command = new LibShuffCommand(optionString); } else if(commandName == "heatmap.bin") { command = new HeatMapCommand(optionString); } else if(commandName == "heatmap.sim") { command = new HeatMapSimCommand(optionString); } else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(optionString); } else if(commandName == "venn") { command = new VennCommand(optionString); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(optionString); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(optionString); } else if(commandName == "tree.shared") { command = new TreeSharedCommand(optionString); } else if(commandName == "dist.shared") { command = new DistSharedCommand(optionString); } else if(commandName == "dist.seqs") { command = new DistanceCommand(optionString); } else if(commandName == "align.seqs") { command = new AlignCommand(optionString); } else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(optionString); } else if(commandName == "screen.seqs") { command = new ScreenSeqsCommand(optionString); } else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { command = new TrimFlowsCommand(optionString); } else if(commandName == "shhh.flows") { command = new ShhherCommand(optionString); } else if(commandName == "list.seqs") { command = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { command = new GetSeqsCommand(optionString); } else if(commandName == "remove.seqs") { command = new RemoveSeqsCommand(optionString); } else if(commandName == "merge.files") { command = new MergeFileCommand(optionString); } else if(commandName == "system") { command = new SystemCommand(optionString); } else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); } else if(commandName == "get.sharedseqs") { command = new GetSharedOTUCommand(optionString); } else if(commandName == "get.otulist") { command = new GetListCountCommand(optionString); } else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); } else if(commandName == "chimera.ccode") { command = new ChimeraCcodeCommand(optionString); } else if(commandName == "chimera.check") { command = new ChimeraCheckCommand(optionString); } else if(commandName == "chimera.slayer") { command = new ChimeraSlayerCommand(optionString); } else if(commandName == "chimera.uchime") { command = new ChimeraUchimeCommand(optionString); } else if(commandName == "chimera.pintail") { command = new ChimeraPintailCommand(optionString); } else if(commandName == "chimera.bellerophon") { command = new ChimeraBellerophonCommand(optionString); } else if(commandName == "chimera.vsearch") { command = new ChimeraVsearchCommand(optionString); } else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); } else if(commandName == "mgcluster") { command = new MGClusterCommand(optionString); } else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { command = new PCOACommand(optionString); } else if(commandName == "pca") { command = new PCACommand(optionString); } else if(commandName == "nmds") { command = new NMDSCommand(optionString); } else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } else if(commandName == "phylo.diversity") { command = new PhyloDiversityCommand(optionString); } else if((commandName == "make.group") || (commandName == "make.count")) { command = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { command = new ChopSeqsCommand(optionString); } else if(commandName == "clearcut") { command = new ClearcutCommand(optionString); } else if(commandName == "split.abund") { command = new SplitAbundCommand(optionString); } else if(commandName == "cluster.split") { command = new ClusterSplitCommand(optionString); } else if(commandName == "classify.otu") { command = new ClassifyOtuCommand(optionString); } else if(commandName == "degap.seqs") { command = new DegapSeqsCommand(optionString); } else if(commandName == "get.relabund") { command = new GetRelAbundCommand(optionString); } else if(commandName == "sens.spec") { command = new SensSpecCommand(optionString); } else if(commandName == "seq.error") { command = new SeqErrorCommand(optionString); } else if(commandName == "sffinfo") { command = new SffInfoCommand(optionString); } else if(commandName == "normalize.shared") { command = new NormalizeSharedCommand(optionString); } else if(commandName == "metastats") { command = new MetaStatsCommand(optionString); } else if(commandName == "split.groups") { command = new SplitGroupCommand(optionString); } else if(commandName == "cluster.fragments") { command = new ClusterFragmentsCommand(optionString); } else if(commandName == "get.lineage") { command = new GetLineageCommand(optionString); } else if(commandName == "remove.lineage") { command = new RemoveLineageCommand(optionString); } else if(commandName == "get.groups") { command = new GetGroupsCommand(optionString); } else if(commandName == "remove.groups") { command = new RemoveGroupsCommand(optionString); } else if((commandName == "get.otus") || (commandName == "get.otulabels")) { command = new GetOtusCommand(optionString); } else if((commandName == "remove.otus") || (commandName == "remove.otulabels")) { command = new RemoveOtusCommand(optionString); } else if((commandName == "list.otus") ||(commandName == "list.otulabels")) { command = new ListOtusCommand(optionString); } else if(commandName == "fastq.info") { command = new ParseFastaQCommand(optionString); } else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); } else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); } else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); } else if(commandName == "sub.sample") { command = new SubSampleCommand(optionString); } else if(commandName == "indicator") { command = new IndicatorCommand(optionString); } else if(commandName == "consensus.seqs") { command = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); } else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); } else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); } else if(commandName == "merge.count") { command = new MergeCountCommand(optionString); } else if(commandName == "amova") { command = new AmovaCommand(optionString); } else if(commandName == "homova") { command = new HomovaCommand(optionString); } else if(commandName == "mantel") { command = new MantelCommand(optionString); } else if(commandName == "make.fastq") { command = new MakeFastQCommand(optionString); } else if(commandName == "get.current") { command = new GetCurrentCommand(optionString); } else if(commandName == "set.current") { command = new SetCurrentCommand(optionString); } else if(commandName == "anosim") { command = new AnosimCommand(optionString); } else if(commandName == "make.shared") { command = new SharedCommand(optionString); } else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); } else if((commandName == "count.seqs") || (commandName == "make.table")) { command = new CountSeqsCommand(optionString); } else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); } else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); } else if(commandName == "summary.qual") { command = new SummaryQualCommand(optionString); } else if(commandName == "chimera.perseus") { command = new ChimeraPerseusCommand(optionString); } else if(commandName == "shhh.seqs") { command = new ShhhSeqsCommand(optionString); } else if(commandName == "otu.association") { command = new OTUAssociationCommand(optionString); } else if(commandName == "sort.seqs") { command = new SortSeqsCommand(optionString); } else if(commandName == "classify.tree") { command = new ClassifyTreeCommand(optionString); } else if(commandName == "cooccurrence") { command = new CooccurrenceCommand(optionString); } else if(commandName == "pcr.seqs") { command = new PcrSeqsCommand(optionString); } else if(commandName == "create.database") { command = new CreateDatabaseCommand(optionString); } else if(commandName == "make.biom") { command = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { command = new GetCoreMicroBiomeCommand(optionString); } else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); } else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); } else if(commandName == "classify.svm") { command = new ClassifySvmSharedCommand(optionString); } else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); } else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); } else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); } else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); } else if(commandName == "get.communitytype") { command = new GetMetaCommunityCommand(optionString); } else if(commandName == "sparcc") { command = new SparccCommand(optionString); } else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); } else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); } else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); } else if(commandName == "lefse") { command = new LefseCommand(optionString); } else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); } else if(commandName == "make.sra") { command = new SRACommand(optionString); } else if(commandName == "merge.sfffiles") { command = new MergeSfffilesCommand(optionString); } else if(commandName == "get.mimarkspackage") { command = new GetMIMarksPackageCommand(optionString); } else if(commandName == "mimarks.attributes") { command = new MimarksAttributesCommand(optionString); } else if(commandName == "set.seed") { command = new SetSeedCommand(optionString); } else if(commandName == "make.file") { command = new MakeFileCommand(optionString); } else if(commandName == "biom.info") { command = new BiomInfoCommand(optionString); } else if(commandName == "rename.file") { command = new RenameFileCommand(optionString); } else if(commandName == "cluster.fit") { command = new ClusterFitCommand(optionString); } else if(commandName == "merge.otus") { command = new MergeOTUsCommand(optionString); } else if(commandName == "estimator.single") { command = new EstimatorSingleCommand(optionString); } else if(commandName == "sra.info") { command = new SRAInfoCommand(optionString); } else if(commandName == "make.clr") { command = new MakeCLRCommand(optionString); } else if(commandName == "translate.seqs") { command = new TranslateSeqsCommand(optionString); } //else if(commandName == "align.muscle") { command = new AlignMuscleCommand(optionString); } else { command = new NoCommand(optionString); } return command; } catch(exception& e) { m->errorOut(e, "CommandFactory", "getCommand"); exit(1); } } /***********************************************************/ /***********************************************************/ //This function calls the appropriate command fucntions based on user input. Command* CommandFactory::getCommand(string commandName, string optionString, string mode){ try { Command* pipecommand = nullptr; //delete the old command if (commandName != "help") { checkForRedirects(optionString); //user has opted to redirect output from dir where input files are located to some other place if (current->getOutputDir() != "") { if (optionString != "") { optionString += ", outputdir=" + current->getOutputDir(); } else { optionString += "outputdir=" + current->getOutputDir(); } } //user has opted to redirect input from dir where mothur.exe is located to some other place if ((current->getInputDir()).size() != 0) { if (optionString != "") { optionString += ", inputdir=" + (current->getInputDir())[0]; } else { optionString += "inputdir=" + (current->getInputDir())[0]; } } } if(commandName == "cluster") { pipecommand = new ClusterCommand(optionString); } else if(commandName == "unique.seqs") { pipecommand = new UniqueSeqsCommand(optionString); } else if(commandName == "parsimony") { pipecommand = new ParsimonyCommand(optionString); } else if(commandName == "help") { pipecommand = new HelpCommand(optionString); } else if(commandName == "quit") { pipecommand = new QuitCommand(optionString); } else if(commandName == "collect.single") { pipecommand = new CollectCommand(optionString); } else if(commandName == "collect.shared") { pipecommand = new CollectSharedCommand(optionString); } else if(commandName == "rarefaction.single") { pipecommand = new RareFactCommand(optionString); } else if(commandName == "rarefaction.shared") { pipecommand = new RareFactSharedCommand(optionString); } else if(commandName == "summary.single") { pipecommand = new SummaryCommand(optionString); } else if(commandName == "summary.shared") { pipecommand = new SummarySharedCommand(optionString); } else if(commandName == "unifrac.weighted") { pipecommand = new UnifracWeightedCommand(optionString); } else if(commandName == "unifrac.unweighted") { pipecommand = new UnifracUnweightedCommand(optionString); } else if(commandName == "get.group") { pipecommand = new GetgroupCommand(optionString); } else if(commandName == "get.label") { pipecommand = new GetlabelCommand(optionString); } else if(commandName == "get.sabund") { pipecommand = new GetSAbundCommand(optionString); } else if(commandName == "get.rabund") { pipecommand = new GetRAbundCommand(optionString); } else if(commandName == "libshuff") { pipecommand = new LibShuffCommand(optionString); } else if(commandName == "heatmap.bin") { pipecommand = new HeatMapCommand(optionString); } else if(commandName == "heatmap.sim") { pipecommand = new HeatMapSimCommand(optionString); } else if(commandName == "filter.seqs") { pipecommand = new FilterSeqsCommand(optionString); } else if(commandName == "venn") { pipecommand = new VennCommand(optionString); } else if(commandName == "bin.seqs") { pipecommand = new BinSeqCommand(optionString); } else if(commandName == "get.oturep") { pipecommand = new GetOTURepCommand(optionString); } else if(commandName == "tree.shared") { pipecommand = new TreeSharedCommand(optionString); } else if(commandName == "dist.shared") { pipecommand = new DistSharedCommand(optionString); } else if(commandName == "dist.seqs") { pipecommand = new DistanceCommand(optionString); } else if(commandName == "align.seqs") { pipecommand = new AlignCommand(optionString); } else if(commandName == "summary.seqs") { pipecommand = new SeqSummaryCommand(optionString); } else if(commandName == "screen.seqs") { pipecommand = new ScreenSeqsCommand(optionString); } else if(commandName == "reverse.seqs") { pipecommand = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { pipecommand = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { pipecommand = new TrimFlowsCommand(optionString); } else if(commandName == "shhh.flows") { pipecommand = new ShhherCommand(optionString); } else if(commandName == "list.seqs") { pipecommand = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { pipecommand = new GetSeqsCommand(optionString); } else if(commandName == "remove.seqs") { pipecommand = new RemoveSeqsCommand(optionString); } else if(commandName == "merge.files") { pipecommand = new MergeFileCommand(optionString); } else if(commandName == "system") { pipecommand = new SystemCommand(optionString); } else if(commandName == "align.check") { pipecommand = new AlignCheckCommand(optionString); } else if(commandName == "get.sharedseqs") { pipecommand = new GetSharedOTUCommand(optionString); } else if(commandName == "get.otulist") { pipecommand = new GetListCountCommand(optionString); } else if(commandName == "classify.seqs") { pipecommand = new ClassifySeqsCommand(optionString); } else if(commandName == "chimera.ccode") { pipecommand = new ChimeraCcodeCommand(optionString); } else if(commandName == "chimera.check") { pipecommand = new ChimeraCheckCommand(optionString); } else if(commandName == "chimera.uchime") { pipecommand = new ChimeraUchimeCommand(optionString); } else if(commandName == "chimera.slayer") { pipecommand = new ChimeraSlayerCommand(optionString); } else if(commandName == "chimera.pintail") { pipecommand = new ChimeraPintailCommand(optionString); } else if(commandName == "chimera.bellerophon") { pipecommand = new ChimeraBellerophonCommand(optionString); } else if(commandName == "chimera.vsearch") { pipecommand = new ChimeraVsearchCommand(optionString); } else if(commandName == "phylotype") { pipecommand = new PhylotypeCommand(optionString); } else if(commandName == "mgcluster") { pipecommand = new MGClusterCommand(optionString); } else if(commandName == "pre.cluster") { pipecommand = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { pipecommand = new PCOACommand(optionString); } else if(commandName == "pca") { pipecommand = new PCACommand(optionString); } else if(commandName == "nmds") { pipecommand = new NMDSCommand(optionString); } else if(commandName == "otu.hierarchy") { pipecommand = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { pipecommand = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { pipecommand = new SetLogFileCommand(optionString); } else if(commandName == "phylo.diversity") { pipecommand = new PhyloDiversityCommand(optionString); } else if((commandName == "make.group") || (commandName == "make.count")) { pipecommand = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { pipecommand = new ChopSeqsCommand(optionString); } else if(commandName == "clearcut") { pipecommand = new ClearcutCommand(optionString); } else if(commandName == "split.abund") { pipecommand = new SplitAbundCommand(optionString); } else if(commandName == "cluster.split") { pipecommand = new ClusterSplitCommand(optionString); } else if(commandName == "classify.otu") { pipecommand = new ClassifyOtuCommand(optionString); } else if(commandName == "degap.seqs") { pipecommand = new DegapSeqsCommand(optionString); } else if(commandName == "get.relabund") { pipecommand = new GetRelAbundCommand(optionString); } else if(commandName == "sens.spec") { pipecommand = new SensSpecCommand(optionString); } else if(commandName == "seq.error") { pipecommand = new SeqErrorCommand(optionString); } else if(commandName == "sffinfo") { pipecommand = new SffInfoCommand(optionString); } else if(commandName == "normalize.shared") { pipecommand = new NormalizeSharedCommand(optionString); } else if(commandName == "metastats") { pipecommand = new MetaStatsCommand(optionString); } else if(commandName == "split.groups") { pipecommand = new SplitGroupCommand(optionString); } else if(commandName == "cluster.fragments") { pipecommand = new ClusterFragmentsCommand(optionString); } else if(commandName == "get.lineage") { pipecommand = new GetLineageCommand(optionString); } else if(commandName == "get.groups") { pipecommand = new GetGroupsCommand(optionString); } else if(commandName == "remove.lineage") { pipecommand = new RemoveLineageCommand(optionString); } else if(commandName == "remove.groups") { pipecommand = new RemoveGroupsCommand(optionString); } else if((commandName == "get.otus") || (commandName == "get.otulabels")) { pipecommand = new GetOtusCommand(optionString); } else if((commandName == "remove.otus") || (commandName == "remove.otulabels")) { pipecommand = new RemoveOtusCommand(optionString); } else if((commandName == "list.otus") ||(commandName == "list.otulabels")) { pipecommand = new ListOtusCommand(optionString); } else if(commandName == "fastq.info") { pipecommand = new ParseFastaQCommand(optionString); } else if(commandName == "deunique.seqs") { pipecommand = new DeUniqueSeqsCommand(optionString); } else if(commandName == "pairwise.seqs") { pipecommand = new PairwiseSeqsCommand(optionString); } else if(commandName == "cluster.classic") { pipecommand = new ClusterDoturCommand(optionString); } else if(commandName == "sub.sample") { pipecommand = new SubSampleCommand(optionString); } else if(commandName == "indicator") { pipecommand = new IndicatorCommand(optionString); } else if(commandName == "consensus.seqs") { pipecommand = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { pipecommand = new CorrAxesCommand(optionString); } else if(commandName == "remove.rare") { pipecommand = new RemoveRareCommand(optionString); } else if(commandName == "merge.groups") { pipecommand = new MergeGroupsCommand(optionString); } else if(commandName == "merge.count") { pipecommand = new MergeCountCommand(optionString); } else if(commandName == "amova") { pipecommand = new AmovaCommand(optionString); } else if(commandName == "homova") { pipecommand = new HomovaCommand(optionString); } else if(commandName == "mantel") { pipecommand = new MantelCommand(optionString); } else if(commandName == "anosim") { pipecommand = new AnosimCommand(optionString); } else if(commandName == "make.fastq") { pipecommand = new MakeFastQCommand(optionString); } else if(commandName == "get.current") { pipecommand = new GetCurrentCommand(optionString); } else if(commandName == "set.current") { pipecommand = new SetCurrentCommand(optionString); } else if(commandName == "make.shared") { pipecommand = new SharedCommand(optionString); } else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); } else if((commandName == "count.seqs") || (commandName == "make.table")) { pipecommand = new CountSeqsCommand(optionString); } else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); } else if(commandName == "summary.tax") { pipecommand = new SummaryTaxCommand(optionString); } else if(commandName == "summary.qual") { pipecommand = new SummaryQualCommand(optionString); } else if(commandName == "chimera.perseus") { pipecommand = new ChimeraPerseusCommand(optionString); } else if(commandName == "shhh.seqs") { pipecommand = new ShhhSeqsCommand(optionString); } else if(commandName == "otu.association") { pipecommand = new OTUAssociationCommand(optionString); } else if(commandName == "sort.seqs") { pipecommand = new SortSeqsCommand(optionString); } else if(commandName == "classify.tree") { pipecommand = new ClassifyTreeCommand(optionString); } else if(commandName == "cooccurrence") { pipecommand = new CooccurrenceCommand(optionString); } else if(commandName == "pcr.seqs") { pipecommand = new PcrSeqsCommand(optionString); } else if(commandName == "create.database") { pipecommand = new CreateDatabaseCommand(optionString); } else if(commandName == "make.biom") { pipecommand = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { pipecommand = new GetCoreMicroBiomeCommand(optionString); } else if(commandName == "make.contigs") { pipecommand = new MakeContigsCommand(optionString); } else if(commandName == "sff.multiple") { pipecommand = new SffMultipleCommand(optionString); } //else if(commandName == "classify.rf") { pipecommand = new ClassifyRFSharedCommand(optionString); } else if(commandName == "filter.shared") { pipecommand = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { pipecommand = new PrimerDesignCommand(optionString); } else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); } else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); } else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); } else if(commandName == "get.communitytype") { pipecommand = new GetMetaCommunityCommand(optionString); } else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); } else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); } else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); } else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); } else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); } else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); } else if(commandName == "make.sra") { pipecommand = new SRACommand(optionString); } else if(commandName == "merge.sfffiles") { pipecommand = new MergeSfffilesCommand(optionString); } else if(commandName == "classify.svm") { pipecommand = new ClassifySvmSharedCommand(optionString); } else if(commandName == "get.mimarkspackage") { pipecommand = new GetMIMarksPackageCommand(optionString); } else if(commandName == "mimarks.attributes") { pipecommand = new MimarksAttributesCommand(optionString); } else if(commandName == "set.seed") { pipecommand = new SetSeedCommand(optionString); } else if(commandName == "make.file") { pipecommand = new MakeFileCommand(optionString); } else if(commandName == "biom.info") { pipecommand = new BiomInfoCommand(optionString); } else if(commandName == "rename.file") { pipecommand = new RenameFileCommand(optionString); } else if(commandName == "cluster.fit") { pipecommand = new ClusterFitCommand(optionString); } else if(commandName == "merge.otus") { pipecommand = new MergeOTUsCommand(optionString); } else if(commandName == "estimator.single") { pipecommand = new EstimatorSingleCommand(optionString); } else if(commandName == "sra.info") { pipecommand = new SRAInfoCommand(optionString); } else if(commandName == "make.clr") { pipecommand = new MakeCLRCommand(optionString); } else if(commandName == "translate.seqs") { pipecommand = new TranslateSeqsCommand(optionString); } //else if(commandName == "align.muscle") { pipecommand = new AlignMuscleCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; } catch(exception& e) { m->errorOut(e, "CommandFactory", "getCommand"); exit(1); } } /***********************************************************************/ bool CommandFactory::isValidCommand(string command) { try { //is the command in the map if ((commands.find(command)) != (commands.end())) { return true; }else{ m->mothurOut(command + " is not a valid command in Mothur. Valid commands are "); for (it = commands.begin(); it != commands.end(); it++) { m->mothurOut(it->first + ", "); } m->mothurOutEndLine(); return false; } } catch(exception& e) { m->errorOut(e, "CommandFactory", "isValidCommand"); exit(1); } } /***********************************************************************/ bool CommandFactory::isValidCommand(string command, string noError) { try { //is the command in the map if ((commands.find(command)) != (commands.end())) { return true; } else{ return false; } } catch(exception& e) { m->errorOut(e, "CommandFactory", "isValidCommand"); exit(1); } } /***********************************************************************/ void CommandFactory::printCommands(ostream& out) { try { it = commands.begin(); out << "Valid commands are: " << it->first << ", "; it++; out << it->first; it++; for (; it != commands.end(); it++) { out << ", " << it->first; } out << "." << endl; } catch(exception& e) { m->errorOut(e, "CommandFactory", "printCommands"); exit(1); } } /***********************************************************************/ void CommandFactory::printCommandsCategories(ostream& out) { try { map commands = getListCommands(); map::iterator it; map categories; map::iterator itCat; //loop through each command outputting info for (it = commands.begin(); it != commands.end(); it++) { Command* thisCommand = getCommand(it->first, "category"); //don't add hidden commands if (thisCommand->getCommandCategory() != "Hidden") { itCat = categories.find(thisCommand->getCommandCategory()); if (itCat == categories.end()) { categories[thisCommand->getCommandCategory()] = thisCommand->getCommandName(); }else { categories[thisCommand->getCommandCategory()] += ", " + thisCommand->getCommandName(); } } } for (itCat = categories.begin(); itCat != categories.end(); itCat++) { #if defined NON_WINDOWS out << BOLDMAGENTA << endl << itCat->first << " commmands include: " << RESET << itCat->second << endl; #else out << endl << itCat->first << " commmands include: " << itCat->second << endl; #endif } } catch(exception& e) { m->errorOut(e, "CommandFactory", "printCommandsCategories"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/commandfactory.hpp000077500000000000000000000021431424121717000201430ustar00rootroot00000000000000#ifndef COMMANDFACTORY_HPP #define COMMANDFACTORY_HPP /* * commandfactory.h * * * Created by Pat Schloss on 10/25/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "mothurout.h" #include "currentfile.h" class Command; class CommandFactory { public: static CommandFactory* getInstance(); Command* getCommand(string, string, string); Command* getCommand(string, string); Command* getCommand(string); bool isValidCommand(string); bool isValidCommand(string, string); void printCommands(ostream&); void printCommandsCategories(ostream&); map getListCommands() { return commands; } private: MothurOut* m; CurrentFile* current; Utils util; map commands; map::iterator it; bool append; int checkForRedirects(string); static CommandFactory* _uniqueInstance; CommandFactory( const CommandFactory& ); // Disable copy constructor void operator=( const CommandFactory& ); // Disable assignment operator CommandFactory(); ~CommandFactory(); }; #endif mothur-1.48.0/source/commandoptionparser.cpp000077500000000000000000000046151424121717000212220ustar00rootroot00000000000000/* * commandoptionparser.cpp * * * Created by Pat Schloss on 10/23/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "commandoptionparser.hpp" //********************************************************************************************************************** //This Function parses through the command line and pulls out the command then sends the options to the parseGlobalData CommandOptionParser::CommandOptionParser(string input){ try { m = MothurOut::getInstance(); CurrentFile* current = CurrentFile::getInstance(); int openParen = input.find_first_of('('); int closeParen = input.find_last_of(')'); optionString = ""; commandString = ""; if(openParen != string::npos && closeParen != string::npos){ //gobble extra spaces int spot = 0; for (int i = 0; i < input.length(); i++) { if (!(isspace(input[i]))) { spot = i; break; } } if (spot > openParen) { spot = 0; } commandString = input.substr(spot, openParen-spot); //commandString contains everything before "(" optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")". if (!(commandString == "set.logfile")) { if (m->getLogFileName() == "") { time_t ltime = time(nullptr); /* calendar time */ string outputPath = current->getOutputDir(); string logFileName = outputPath + "mothur." + toString(ltime) + ".logfile"; m->setLogFileName(logFileName, false); } } } else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing (\n"); } else if (closeParen == -1) { m->mothurOut("[ERROR]: You are missing )\n"); } } catch(exception& e) { m->errorOut(e, "CommandOptionParser", "CommandOptionParser"); exit(1); } } //********************************************************************************************************************** string CommandOptionParser::getCommandString() { return commandString; } //********************************************************************************************************************** string CommandOptionParser::getOptionString() { return optionString; } //********************************************************************************************************************** mothur-1.48.0/source/commandoptionparser.hpp000077500000000000000000000010751424121717000212240ustar00rootroot00000000000000#ifndef COMMANDOPTIONPARSER_HPP #define COMMANDOPTIONPARSER_HPP #include "mothur.h" #include "mothurout.h" #include "currentfile.h" //********************************************************************************************************************** class CommandOptionParser { public: CommandOptionParser(string); string getCommandString(); string getOptionString(); private: string commandString, optionString; MothurOut* m; }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commandparameter.h000077500000000000000000000072261424121717000201230ustar00rootroot00000000000000#ifndef COMMANDPARAMETER_H #define COMMANDPARAMETER_H /* * commandparameter.h * Mothur * * Created by westcott on 3/23/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothur.h" //********************************************************************************************************************** class CommandParameter { public: CommandParameter() { name = ""; type = ""; options = ""; optionsDefault = ""; chooseOnlyOneGroup = ""; chooseAtLeastOneGroup = ""; linkedGroup = ""; multipleSelectionAllowed = false; required = false; important = false; outputTypes = ""; } CommandParameter(string n, string t, string o, string d, string only, string atLeast, string linked, string opt, bool m, bool r, bool i) : name(n), type(t), options(o), optionsDefault(d), chooseOnlyOneGroup(only), chooseAtLeastOneGroup(atLeast), linkedGroup(linked), outputTypes(opt),multipleSelectionAllowed(m), required(r), important(i) {} CommandParameter(string n, string t, string o, string d, string only, string atLeast, string linked, string opt, bool m, bool r) : name(n), type(t), options(o), optionsDefault(d), chooseOnlyOneGroup(only), chooseAtLeastOneGroup(atLeast), linkedGroup(linked), outputTypes(opt), multipleSelectionAllowed(m), required(r) { important = false; } ~CommandParameter() = default; string name; //something like fasta, processors, method string type; //must be set to "Boolean", "Multiple", "Number", "String", "InputTypes" - InputTypes is for file inputs string options; //if the parameter has specific options allowed, used for parameters of type "Multiple", something like "furthest-nearest-average", or "sobs-chao...", leave blank for command that do not required specific options string optionsDefault; //the default for this parameter, could be something like "F" for a boolean or "100" for a number or "sobs-chao" for multiple //for chooseOnlyOneGroup, chooseAtLeastOneGroup and linkedGroup if no group is needed set to "none". string chooseOnlyOneGroup; //for file inputs: if a command has several options for input files but you can only choose one then put them in a group //for instance in the read.dist command you can use a phylip or column file but not both so set chooseOnlyOneGroup for both parameters to something like "DistanceFileGroup" string chooseAtLeastOneGroup; //for file inputs: if a command has several options for input files and you want to make sure one is choosen then put them in a group //for instance in the read.dist command you must provide a phylip or column file so set chooseAtLeastOneGroup for both parameters to something like "DistanceFileGroup" string linkedGroup; //for file inputs: if a command has a file option were if you provide one you must provide another you can put them in a group //for instance in the cluster command if you provide a column file you must provide a name file so set linkedGroup for both parameters to something like "ColumnNameGroup" bool multipleSelectionAllowed; //for "Multiple" type to say whether you can select multiple options, for instance for calc parameter set to true, but for method set to false bool required; //is this parameter required bool important; //is this parameter important. The gui will put "important" parameters first in the option panel. string outputTypes; //types on files created by the command if this parameter is given. ie. get.seqs command fasta parameter makes a fasta file. can be multiple values split by dashes. private: }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commands/000077500000000000000000000000001424121717000162225ustar00rootroot00000000000000mothur-1.48.0/source/commands/aligncheckcommand.cpp000077500000000000000000000342271424121717000223700ustar00rootroot00000000000000/* * secondarystructurecommand.cpp * Mothur * * Created by westcott on 9/18/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "aligncheckcommand.h" #include "sequence.hpp" #include "counttable.h" //********************************************************************************************************************** vector AlignCheckCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","aligncheck",false,true,true); parameters.push_back(pfasta); CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pmap); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pcount); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; haderror = 0; vector tempOutNames; outputTypes["aligncheck"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string AlignCheckCommand::getHelpString(){ try { string helpString = ""; helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n"; helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n"; helpString += "The align.check command parameters are fasta and map, both are required.\n"; helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n"; helpString += "Example align.check(map=silva.ss.map, fasta=amazon.fasta).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string AlignCheckCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "aligncheck") { pattern = "[filename],align.check"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** AlignCheckCommand::AlignCheckCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; mapfile = validParameter.validFile(parameters, "map"); if (mapfile == "not open") { abort = true; } else if (mapfile == "not found") { mapfile = ""; m->mothurOut("You must provide an map file.\n"); abort = true; } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (outputdir == "") { outputdir += util.hasPath(fastafile); } } } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "AlignCheckCommand"); exit(1); } } //********************************************************************************************************************** int AlignCheckCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get secondary structure info. readMap(); if (namefile != "") { nameMap = util.readNames(namefile); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); } if (m->getControl_pressed()) { return 0; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outfile = getOutputFileName("aligncheck",variables); ofstream out; util.openOutputFile(outfile, out); ifstream in; util.openInputFile(fastafile, in); out << "name" << '\t' << "pound" << '\t' << "dash" << '\t' << "plus" << '\t' << "equal" << '\t'; out << "loop" << '\t' << "tilde" << '\t' << "total" << '\t' << "numseqs" << endl; vector pound; vector dash; vector plus; vector equal; vector loop; vector tilde; vector total; int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outfile); return 0; } Sequence seq(in); gobble(in); if (seq.getName() != "") { statData data = getStats(seq.getAligned()); if (haderror == 1) { m->setControl_pressed(true); break; } int num = 1; if ((namefile != "") || (countfile != "")) { //make sure this sequence is in the namefile, else error map::iterator it = nameMap.find(seq.getName()); if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + seq.getName() + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); } else { num = it->second; } } //for each sequence this sequence represents for (int i = 0; i < num; i++) { pound.push_back(data.pound); dash.push_back(data.dash); plus.push_back(data.plus); equal.push_back(data.equal); loop.push_back(data.loop); tilde.push_back(data.tilde); total.push_back(data.total); } count++; out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t'; out << data.loop << '\t' << data.tilde << '\t' << data.total << '\t' << num << endl; } } in.close(); out.close(); if (m->getControl_pressed()) { util.mothurRemove(outfile); return 0; } sort(pound.begin(), pound.end()); sort(dash.begin(), dash.end()); sort(plus.begin(), plus.end()); sort(equal.begin(), equal.end()); sort(loop.begin(), loop.end()); sort(tilde.begin(), tilde.end()); sort(total.begin(), total.end()); int size = pound.size(); int ptile0_25 = int(size * 0.025); int ptile25 = int(size * 0.250); int ptile50 = int(size * 0.500); int ptile75 = int(size * 0.750); int ptile97_5 = int(size * 0.975); int ptile100 = size - 1; if (m->getControl_pressed()) { util.mothurRemove(outfile); return 0; } m->mothurOut("\n\t\tPound\tDash\tPlus\tEqual\tLoop\tTilde\tTotal\n"); m->mothurOut("Minimum:\t" + toString(pound[0]) + "\t" + toString(dash[0]) + "\t" + toString(plus[0]) + "\t" + toString(equal[0]) + "\t" + toString(loop[0]) + "\t" + toString(tilde[0]) + "\t" + toString(total[0])+ "\n"); m->mothurOut("2.5%-tile:\t" + toString(pound[ptile0_25]) + "\t" + toString(dash[ptile0_25]) + "\t" + toString(plus[ptile0_25]) + "\t" + toString(equal[ptile0_25]) + "\t"+ toString(loop[ptile0_25]) + "\t"+ toString(tilde[ptile0_25]) + "\t"+ toString(total[ptile0_25])+ "\n"); m->mothurOut("25%-tile:\t" + toString(pound[ptile25]) + "\t" + toString(dash[ptile25]) + "\t" + toString(plus[ptile25]) + "\t" + toString(equal[ptile25]) + "\t" + toString(loop[ptile25]) + "\t" + toString(tilde[ptile25]) + "\t" + toString(total[ptile25])+ "\n"); m->mothurOut("Median: \t" + toString(pound[ptile50]) + "\t" + toString(dash[ptile50]) + "\t" + toString(plus[ptile50]) + "\t" + toString(equal[ptile50]) + "\t" + toString(loop[ptile50]) + "\t" + toString(tilde[ptile50]) + "\t" + toString(total[ptile50])+ "\n"); m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])+ "\n"); m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])+ "\n"); m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])+ "\n"); if ((namefile == "") && (countfile == "")) { m->mothurOut("# of Seqs:\t" + toString(count)+ "\n"); } else { m->mothurOut("# of unique seqs:\t" + toString(count)+ "\n"); m->mothurOut("total # of seqs:\t" + toString(size)+ "\n"); } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outfile); m->mothurOutEndLine(); outputNames.push_back(outfile); outputTypes["aligncheck"].push_back(outfile); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "execute"); exit(1); } } //********************************************************************************************************************** void AlignCheckCommand::readMap(){ try { structMap.resize(1, 0); ifstream in; util.openInputFile(mapfile, in); while(!in.eof()){ int position; in >> position; structMap.push_back(position); gobble(in); } in.close(); seqLength = structMap.size(); //check you make sure is structMap[10] = 380 then structMap[380] = 10. for(int i=0;imothurOut("Your map file contains an error: line " + toString(i) + " does not match line " + toString(structMap[i]) + ".\n"); } } } } catch(exception& e) { m->errorOut(e, "AlignCheckCommand", "readMap"); exit(1); } } /**************************************************************************************************/ statData AlignCheckCommand::getStats(string sequence){ try { statData data; sequence = "*" + sequence; // need to pad the sequence so we can index it by 1 int length = sequence.length(); if (length != seqLength) { m->mothurOut("your sequences are " + toString(length) + " long, but your map file only contains " + toString(seqLength) + " entries. please correct.\n"); haderror = 1; return data; } for(int i=1;ierrorOut(e, "AlignCheckCommand", "getStats"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/aligncheckcommand.h000077500000000000000000000031211424121717000220220ustar00rootroot00000000000000#ifndef SECONDARYSTRUCTURECHECKERCOMMAND_H #define SECONDARYSTRUCTURECHECKERCOMMAND_H /* * aligncheckcommand.h * Mothur * * Created by westcott on 9/18/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" /**************************************************************************************************/ struct statData { int pound; int tilde; int dash; int plus; int equal; int loop; int total; statData() : pound(0), loop(0), tilde(0), dash(0), plus(0), equal(0), total(0) {}; }; /**************************************************************************************************/ class AlignCheckCommand : public Command { public: AlignCheckCommand(string); ~AlignCheckCommand(){} vector setParameters(); string getCommandName() { return "align.check"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Align.check"; } string getDescription() { return "calculate the number of potentially misaligned bases in a 16S rRNA gene sequence alignment"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector structMap; string mapfile, fastafile, namefile, countfile; bool abort; int seqLength, haderror; vector outputNames; map nameMap; void readMap(); statData getStats(string sequence); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/aligncommand.cpp000066400000000000000000000671611424121717000213720ustar00rootroot00000000000000/* * aligncommand.cpp * Mothur * * Created by Sarah Westcott on 5/15/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * * This version of nast does everything I think that the greengenes nast server does and then some. I have added the * feature of allowing users to define their database, kmer size for searching, alignment penalty values and alignment * method. This latter feature is perhaps most significant. nastPlus enables a user to use either a Needleman-Wunsch * (non-affine gap penalty) or Gotoh (affine gap penalty) pairwise alignment algorithm. This is significant because it * allows for a global alignment and not the local alignment provided by bLAst. Furthermore, it has the potential to * provide a better alignment because of the banding method employed by blast (I'm not sure about this). * */ #include "aligncommand.h" //********************************************************************************************************************** vector AlignCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pcandidate("fasta", "InputTypes", "", "", "none", "none", "none","fasta-alignreport-accnos",false,true,true); parameters.push_back(pcandidate); CommandParameter psearch("search", "Multiple", "kmer-suffix", "kmer", "", "", "","",false,false,true); parameters.push_back(psearch); CommandParameter pksize("ksize", "Number", "", "8", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter palign("align", "Multiple", "needleman-gotoh-noalign", "needleman", "", "", "","",false,false,true); parameters.push_back(palign); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-5.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapextend); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pflip("flip", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pflip); CommandParameter pthreshold("threshold", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["accnos"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "AlignCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string AlignCommand::getHelpString(){ try { string helpString = "\n"; helpString += "The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n"; helpString += "The align.seqs command parameters are " + getCommandParameters() + ".\n"; helpString += "The reference and fasta parameters are required. You may leave fasta blank if you have a valid fasta file.\n"; helpString += "The search parameter allows you to specify the method to find most similar reference sequence. Your options are: suffix or kmer. The default is kmer.\n"; helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman and noalign. The default is needleman.\n"; helpString += "The ksize parameter allows you to specify the kmer size for finding most similar reference to a given sequence. The default is 8.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. Default=1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. Default=-1.0.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. Default=-5.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. Default=-2.0.\n"; helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported."; helpString += " By default, mothur will align the reverse compliment of your sequences when the alignment process removes more than 50% of the bases indicating the read may be flipped. This process assembles the best possible alignment, and downstream analysis will remove any poor quality reads remaining.\n"; helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.\n"; helpString += "The align.seqs command should be in the following format: "; helpString += "align.seqs(reference=yourTemplateFile, fasta=yourUnalignedFastaFile)\n"; helpString += "Example: align.seqs(fasta=water.fasta, template=silva.v4.fasta)\n\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "AlignCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string AlignCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "...template is not aligned, aborting. What do I do?"; issues.push_back(issue); string ianswer = "\tMothur requires the reference file to be aligned to generate aligned sequences. You can download mothur's aligned silva references here, https://mothur.org/wiki/Silva_reference_files. For ITS sequences, see 'how to' below.\n"; ianswers.push_back(ianswer); issue = "...xxx of your sequences generated alignments that eliminated too many bases... What does this mean?"; issues.push_back(issue); ianswer = "\tBy default, mothur will align the reverse compliment of your sequences when the alignment process removes more than 50% of the bases indicating the read may be flipped. This process assembles the best possible alignment, and downstream analysis will remove any poor quality reads remaining.\n"; ianswers.push_back(ianswer); string howto = "How do I 'align' ITS sequences?"; howtos.push_back(howto); string hanswer = "\tYou really can't do an alignment because there isn't positional homology. You can use the pre.cluster and pairwise.seqs commands to generate a distance matrix from unaligned sequences.\n"; hanswers.push_back(hanswer); howto = "How do I create a custom reference for the region I am studying?"; howtos.push_back(howto); hanswer = "\tYou can tailor your reference using this method: http://blog.mothur.org/2016/07/07/Customization-for-your-region/.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "AlignCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string AlignCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],align"; } //makes file like: amazon.align else if (type == "alignreport") { pattern = "[filename],align_report"; } else if (type == "accnos") { pattern = "[filename],flip.accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "AlignCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** AlignCommand::AlignCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true;} else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; templateFileName = validParameter.validFile(parameters, "reference"); if (templateFileName == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required for the align.seqs command, aborting.\n"); abort = true; }else if (templateFileName == "not open") { abort = true; } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "ksize"); if (temp == "not found"){ temp = "8"; } util.mothurConvert(temp, kmerSize); temp = validParameter.valid(parameters, "match"); if (temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "mismatch"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, misMatch); temp = validParameter.valid(parameters, "gapopen"); if (temp == "not found"){ temp = "-5.0"; } util.mothurConvert(temp, gapOpen); temp = validParameter.valid(parameters, "gapextend"); if (temp == "not found"){ temp = "-2.0"; } util.mothurConvert(temp, gapExtend); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "flip"); if (temp == "not found"){ temp = "t"; } flip = util.isTrue(temp); temp = validParameter.valid(parameters, "threshold"); if (temp == "not found"){ temp = "0.50"; } util.mothurConvert(temp, threshold); search = validParameter.valid(parameters, "search"); if (search == "not found"){ search = "kmer"; } if ((search != "suffix") && (search != "kmer")) { m->mothurOut("invalid search option: choices are kmer or suffix.\n"); abort=true; } align = validParameter.valid(parameters, "align"); if (align == "not found"){ align = "needleman"; } if ((align != "needleman") && (align != "gotoh") && (align != "noalign")) { m->mothurOut("invalid align option: choices are needleman, gotoh or noalign.\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "AlignCommand", "AlignCommand"); exit(1); } } //********************************************************************************************************************** AlignCommand::~AlignCommand(){} //********************************************************************************************************************** int AlignCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long long before = util.getRAMUsed(); long long total = util.getTotalRAM(); if (m->getDebug()) { m->mothurOut("[DEBUG]: RAM used before reading template " + toString(before) + " of total RAM available " + toString(total) + "\n"); } templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, util.getRandomNumber(), true); if (m->getControl_pressed()) { outputTypes.clear(); return 0; } time_t start = time(nullptr); m->mothurOut("\nAligning sequences from " + fastafile + " ...\n" ); if (outputdir == "") { outputdir += util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string alignFileName = getOutputFileName("fasta", variables); string reportFileName = getOutputFileName("alignreport", variables); string accnosFileName = getOutputFileName("accnos", variables); bool hasAccnos = true; vector numFlipped; numFlipped.push_back(0); //numflipped because reverse was better numFlipped.push_back(0); //total number of sequences with over 50% of bases removed long long numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, fastafile, numFlipped); delete templateDB; if (m->getControl_pressed()) { util.mothurRemove(accnosFileName); util.mothurRemove(alignFileName); util.mothurRemove(reportFileName); outputTypes.clear(); return 0; } //delete accnos file if its blank else report to user if (util.isBlank(accnosFileName)) { util.mothurRemove(accnosFileName); hasAccnos = false; } else { m->mothurOut("[WARNING]: " + toString(numFlipped[1]) + " of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well. flip=t"); }else{ m->mothurOut("\n[NOTE]: " + toString(numFlipped[0]) + " of your sequences were reversed to produce a better alignment."); } m->mothurOutEndLine(); } outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName); outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName); if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to align " + toString(numFastaSeqs) + " sequences.\n"); //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCommand", "execute"); exit(1); } } //********************************************************************************************************************** struct alignStruct { OutputWriter* alignWriter; OutputWriter* reportWriter; OutputWriter* accnosWriter; string inputFilename; string alignMethod, search, templateFileName; float match, misMatch, gapOpen, gapExtend, threshold; bool flip; long long numSeqs; int kmerSize; vector flippedResults; linePair filePos; MothurOut* m; Utils util; AlignmentDB* templateDB; Alignment* alignment; alignStruct (linePair fP, OutputWriter* aFName, OutputWriter* reFName, OutputWriter* ac, string fname, AlignmentDB* tfn, string al, float ma, float misMa, float gOpen, float gExtend, float thr, bool fl, int ks, string se) { filePos.start = fP.start; filePos.end = fP.end; alignWriter = aFName; reportWriter = reFName; accnosWriter = ac; inputFilename = fname; numSeqs = 0; m = MothurOut::getInstance(); match = ma; misMatch = misMa; gapOpen = gOpen; gapExtend = gExtend; threshold = thr; flip = fl; search = se; kmerSize = ks; flippedResults.resize(2, 0); alignMethod = al; templateDB = tfn; int longestBase = templateDB->getLongestBase(); if (m->getDebug()) { m->mothurOut("[DEBUG]: template longest base = " + toString(longestBase) + " \n"); } if(al == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } else if(al == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } else if(al == "noalign") { alignment = new NoAlign(); } else { m->mothurOut(al + " is not a valid alignment option. I will run the command using needleman.\n"); alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } } ~alignStruct() { delete alignment; } }; //********************************************************************************************************************** void alignDriver(alignStruct* params) { try { AlignReport report; ifstream inFASTA; params->util.openInputFile(params->inputFilename, inFASTA); inFASTA.seekg(params->filePos.start); bool done = false; long long count = 0; long long numFlipped_0 = 0; long long numFlipped_1 = 0; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); report.setCandidate(candidateSeq); int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * params->threshold; if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getUnaligned().length()+1 > params->alignment->getnRows()) { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(params->alignment->getnRows()) + " \n"); } params->alignment->resize(candidateSeq->getUnaligned().length()+2); } float searchScore; Sequence temp = params->templateDB->findClosestSequence(candidateSeq, searchScore); Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); Nast* nast = new Nast(params->alignment, candidateSeq, templateSeq); Sequence* copy; Nast* nast2; bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below //since nast does not make a copy of hte sequence passed, and it is used by the reporter below //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place //so this bool tells you if you need to delete it //if there is a possibility that this sequence should be reversed if (candidateSeq->getNumBases() < numBasesNeeded) { numFlipped_1++; //if the user wants you to try the reverse if (params->flip) { //get reverse compliment copy = new Sequence(candidateSeq->getName(), originalUnaligned); copy->reverseComplement(); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: flipping " + candidateSeq->getName() + " \n"); } //rerun alignment Sequence temp2 = params->templateDB->findClosestSequence(copy, searchScore); Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: closest template " + temp2.getName() + " \n"); } nast2 = new Nast(params->alignment, copy, templateSeq2); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: completed Nast2 " + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); } //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better delete templateSeq; templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; params->accnosWriter->write(candidateSeq->getName()+ '\n'); numFlipped_0++; }else{ delete nast2; delete templateSeq2; delete copy; } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: done.\n"); } } } report.setTemplate(templateSeq); report.setSearchParameters(params->search, searchScore); report.setAlignmentParameters(params->alignMethod, params->alignment); report.setNastParameters(*nast); params->alignWriter->write('>' + candidateSeq->getName() + '\n' + candidateSeq->getAligned() + '\n'); params->reportWriter->write(report.getSeqReport()); delete nast; delete templateSeq; if (needToDeleteCopy) { delete copy; } count++; } delete candidateSeq; #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->filePos.end)) { break; } #else if (count == params->filePos.end) { break; } #endif //report progress if((count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress if((count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } params->numSeqs += count; params->flippedResults[0] += numFlipped_0; params->flippedResults[1] += numFlipped_1; inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "AlignCommand", "driver"); exit(1); } } /**************************************************************************************************/ //void alignDriver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename, vector& numFlipped,MothurOut* m, string align, float match, float misMatch, float gapOpen, float gapExtend, float threshold, bool flip, AlignmentDB* templateDB, string search, long long& count) { long long AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename, vector& numFlipped) { try { vector lines; vector positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(filename, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; m->mothurOut("Reducing processors to " + toString(numFastaSeqs) + ".\n"); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; long long num = 0; for (int i = 0; i < numFlipped.size(); i++) { numFlipped[i] = 0; } time_t start, end; time(&start); AlignReport nast; ofstream out; util.openOutputFile(reportFileName, out); nast.printHeaders(out); out.close(); auto synchronizedOutputAlignFile = std::make_shared(alignFileName); auto synchronizedOutputReportFile = std::make_shared(reportFileName, true); auto synchronizedOutputAccnosFile = std::make_shared(accnosFName); for (int i = 0; i < processors-1; i++) { OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[i+1], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, templateDB, align, match, misMatch, gapOpen, gapExtend, threshold, flip, kmerSize, search); data.push_back(dataBundle); workerThreads.push_back(new std::thread(alignDriver, dataBundle)); } OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[0], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, templateDB, align, match, misMatch, gapOpen, gapExtend, threshold, flip, kmerSize, search); alignDriver(dataBundle); numFlipped[0] = dataBundle->flippedResults[0]; numFlipped[1] = dataBundle->flippedResults[1]; num = dataBundle->numSeqs; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->numSeqs; numFlipped[0] += data[i]->flippedResults[0]; numFlipped[1] += data[i]->flippedResults[1]; delete data[i]->alignWriter; delete data[i]->reportWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputAlignFile->close(); synchronizedOutputReportFile->close(); synchronizedOutputAccnosFile->close(); delete threadAlignWriter; delete threadAccnosWriter; delete threadReportWriter; delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to align " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "AlignCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/aligncommand.h000077500000000000000000000037621424121717000210370ustar00rootroot00000000000000#ifndef ALIGNCOMMAND_H #define ALIGNCOMMAND_H /* * aligncommand.h * Mothur * * Created by Sarah Westcott on 5/15/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "searchdatabase.hpp" #include "alignment.hpp" #include "alignmentdb.h" #include "sequence.hpp" #include "gotohoverlap.hpp" #include "needlemanoverlap.hpp" #include "noalign.hpp" #include "nast.hpp" #include "alignreport.hpp" //test class AlignCommand : public Command { #ifdef UNIT_TEST friend class TestAlignSeqsIntegration; #endif public: AlignCommand(string); ~AlignCommand(); vector setParameters(); string getCommandName() { return "align.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "DeSantis TZ, Jr., Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, Andersen GL (2006). NAST: a multiple sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Res 34: W394-9.\nSchloss PD (2009). A high-throughput DNA sequence aligner for microbial ecology studies. PLoS ONE 4: e8230.\nSchloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844.\nhttp://www.mothur.org/wiki/Align.seqs http://www.mothur.org/wiki/Align.seqs"; } string getDescription() { return "align sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } protected: long long createProcesses(string, string, string, string, vector&); void appendReportFiles(string, string); string fastafile, templateFileName, distanceFileName, search, align; float match, misMatch, gapOpen, gapExtend, threshold; int processors, kmerSize; vector outputNames; bool abort, flip, calledHelp, save; AlignmentDB* templateDB; }; #endif mothur-1.48.0/source/commands/alignmusclecommand.cpp000066400000000000000000000377471424121717000226120ustar00rootroot00000000000000// // alignmusclecommand.cpp // Mothur // // Created by Sarah Westcott on 2/16/22. // Copyright © 2022 Schloss Lab. All rights reserved. // #include "alignmusclecommand.hpp" //********************************************************************************************************************** vector AlignMuscleCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pmusclelocation("muscle", "String", "", "", "", "", "","",false,false); parameters.push_back(pmusclelocation); CommandParameter pmethod("method", "Multiple", "align-super5", "align", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter pperturb("perturb", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pperturb); CommandParameter pperm("perm", "Multiple", "none-abc-acb-bca", "none", "", "", "","",false,false,true); parameters.push_back(pperm); CommandParameter pstratified("stratified", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pstratified); CommandParameter pdiversified("diversified", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdiversified); CommandParameter preplicates("replicates", "Number", "", "4", "", "", "","",false,false); parameters.push_back(preplicates); CommandParameter pconsiters("consiters", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pconsiters); CommandParameter piters("refineiters", "Number", "", "100", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string AlignMuscleCommand::getHelpString(){ try { string helpString = ""; helpString += "The align.muscle command creates multiple alignments of protein sequences.\n"; helpString += "This command is a wrapper for muscle written by Robert C. Edgar.\n"; helpString += "The align.muscle command parameters are fasta, method, perturb, perm, stratified, diversified, replicates, consiters, refineiters, processors and muscle.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The method parameter allows you select between align and super5 methods. Default=super5.\n"; helpString += "The perturb parameter allows you to provide a random number seed for generating HMM perturbations. Default=0. https://drive5.com/muscle5/manual/hmm_perturbations.html\n"; helpString += "The perm parameter specifies the guide tree permutation. PERM can be none, abc, acb and bca, default=none. https://drive5.com/muscle5/manual/guide_tree_permutations.html.\n"; helpString += "The stratified parameter allows you to indicate you would like to generate a stratified ensemble. https://drive5.com/muscle5/manual/stratified_ensemble.html\n"; helpString += "The diversified parameter allows you to indicate you would like to generate a diversified ensemble. https://drive5.com/muscle5/manual/diversified_ensemble.html\n"; helpString += "The replicates parameter aloows you to indicate the number of replicates, default 4 for -stratified and 100 for -diversified. With -stratified, one replicate is generated for each guide tree permutation, so the total number of replicates is 4×N.\n"; helpString += "The consiters parameter allows you to indicate the number of consistency iterations. Default 2.\n"; helpString += "The refineiters parameter allows you to indicate the number of refinement iterations. Default 100.\n"; helpString += "The muscle parameter allows you to specify the name and location of your muscle executable. By default mothur will look in your path and mothur's executable and mothur tools locations. You can set the muscle location as follows, muscle=/usr/bin/muscle5.1.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is all available. \n"; helpString += "The align.muscle command should be in the following format: \n"; helpString += "align.muscle(fasta=yourFastaFile) \n"; helpString += "Example: align.muscle(fasta=prot.fasta) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string AlignMuscleCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string AlignMuscleCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[tag],fasta-[filename],fasta"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** AlignMuscleCommand::AlignMuscleCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); perturb = validParameter.valid(parameters, "perturb"); if (perturb == "not found"){ usePerturb = false; perturb = "0"; }else{ usePerturb = true; } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "super5";} if ((method == "align") || (method == "super5")) {} else { m->mothurOut("[WARNING]: " + method + " is not a valid method. Options are align or super5, using super5.\n"); method = "super5"; } perm = validParameter.valid(parameters, "perm"); if (perm == "not found") { perm = "none"; usePerm = false; } else { usePerm = true; } if ((perm == "none") || (perm == "abc") || (perm == "acb") || (perm == "bca")) {} else { m->mothurOut("[WARNING]: " + perm + " is not a valid perm option. Options are none, abc, acb or bca, using none.\n"); perm = "none"; } temp = validParameter.valid(parameters, "stratified"); if (temp == "not found") { temp = "f"; } stratified = util.isTrue(temp); if (stratified) { replicates = "4"; } temp = validParameter.valid(parameters, "diversified"); if (temp == "not found") { temp = "f"; } diversified = util.isTrue(temp); if (diversified) { replicates = "100"; } replicates = validParameter.valid(parameters, "replicates"); if (replicates == "not found") { useReplicates = false; } else{ useReplicates = true; } consiters = validParameter.valid(parameters, "consiters"); if (consiters == "not found") { useConsiters = false; consiters = "2"; } else{ useConsiters = true; } refineiters = validParameter.valid(parameters, "refineiters"); if (refineiters == "not found") { useRefineiters = false; refineiters = "100"; } else{ useRefineiters = true; } #if defined OSX m->mothurOut("\n[ERROR]: The align.muscle command is unavailable for our OSX version of mothur. You can use the command with our linux or windows versions.\n\n"); abort=true; #else vector versionOutputs; bool foundTool = false; string programName = "muscle"; programName += EXECUTABLE_EXT; muscleLocation = validParameter.validFile(parameters, "muscle"); if (muscleLocation == "not found") { muscleLocation = ""; foundTool = util.findTool(programName, muscleLocation, versionOutputs, current->getLocations()); } else { //test to make sure muscle exists muscleLocation = util.getFullPathName(muscleLocation); ifstream in; foundTool = util.openInputFile(muscleLocation, in, "no error"); in.close(); if(!foundTool) { m->mothurOut(muscleLocation + " file does not exist or cannot be opened, ignoring.\n"); muscleLocation = ""; foundTool = util.findTool(programName, muscleLocation, versionOutputs, current->getLocations()); } } if (!foundTool) { abort = true; } muscleLocation = util.getFullPathName(muscleLocation); if (m->getDebug()) { m->mothurOut("[DEBUG]: muscle location using " + muscleLocation + "\n"); } #endif } } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "AlignMuscleCommand"); exit(1); } } //*************************************************************************************************************** int AlignMuscleCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Processing sequences from " + fastafile + " ...\n" ); long start = time(NULL); wrapperFunction(); m->mothurOut("\nIt took " + toString(time(NULL) - start) + " seconds to align your sequences.\n"); //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "execute"); exit(1); } } //********************************************************************************************************************** void AlignMuscleCommand::wrapperFunction(){ try { if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = "muscle"; string outputFileName = getOutputFileName("fasta", variables); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); //to allow for spaces in the path outputFileName = "\"" + outputFileName + "\""; vector cPara; string muscleCommand = muscleLocation; muscleCommand = "\"" + muscleCommand + "\" "; cPara.push_back(util.mothurConvert(muscleCommand)); cPara.push_back(util.mothurConvert("-"+method)); cPara.push_back(util.mothurConvert(fastafile)); //output filename cPara.push_back(util.mothurConvert("-output")); cPara.push_back(util.mothurConvert(outputFileName)); if (usePerturb) { cPara.push_back(util.mothurConvert("-perturb")); cPara.push_back(util.mothurConvert(perturb)); } if (usePerm) { cPara.push_back(util.mothurConvert("-perm")); cPara.push_back(util.mothurConvert(perm)); } if (stratified) { cPara.push_back(util.mothurConvert("-stratified")); } if (diversified) { cPara.push_back(util.mothurConvert("-diversified")); } if (useReplicates) { cPara.push_back(util.mothurConvert("-replicates")); cPara.push_back(util.mothurConvert(replicates)); } if (useConsiters) { cPara.push_back(util.mothurConvert("-consiters")); cPara.push_back(util.mothurConvert(consiters)); } if (useConsiters) { cPara.push_back(util.mothurConvert("-consiters")); cPara.push_back(util.mothurConvert(consiters)); } if (useRefineiters) { cPara.push_back(util.mothurConvert("-refineiters")); cPara.push_back(util.mothurConvert(refineiters)); } char** muscleParameters; muscleParameters = new char*[cPara.size()]; string commandString = ""; for (int i = 0; i < cPara.size(); i++) { muscleParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif if (m->getDebug()) { m->mothurOut("[DEBUG]: muscle command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] muscleParameters; } catch(exception& e) { m->errorOut(e, "AlignMuscleCommand", "wrapperFunction"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/alignmusclecommand.hpp000066400000000000000000000030411424121717000225730ustar00rootroot00000000000000// // alignmusclecommand.hpp // Mothur // // Created by Sarah Westcott on 2/16/22. // Copyright © 2022 Schloss Lab. All rights reserved. // #ifndef alignmusclecommand_hpp #define alignmusclecommand_hpp #include "command.hpp" /***********************************************************/ class AlignMuscleCommand : public Command { public: AlignMuscleCommand(string); ~AlignMuscleCommand() {} vector setParameters(); string getCommandName() { return "align.muscle"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "R.C. Edgar (2021) MUSCLE v5 enables improved estimates of phylogenetic tree confidence by ensemble bootstrapping. (https://www.biorxiv.org/content/10.1101/2021.06.20.449169v1.full.pdf)\nhttp://www.mothur.org/wiki/align.muscle\n"; } string getDescription() { return "align protein sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, usePerturb, stratified, diversified, useReplicates, useConsiters, useRefineiters, usePerm; string fastafile, perturb, method, perm, replicates, consiters, refineiters, muscleLocation; int processors; vector outputNames; void wrapperFunction(); }; /**************************************************************************************************/ #endif /* alignmusclecommand_hpp */ mothur-1.48.0/source/commands/amovacommand.cpp000077500000000000000000000400661424121717000214010ustar00rootroot00000000000000/* * amovacommand.cpp * mothur * * Created by westcott on 2/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "amovacommand.h" #include "readphylipvector.h" #include "designmap.h" //********************************************************************************************************************** vector AmovaCommand::setParameters(){ try { CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pdesign); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pphylip); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["amova"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string AmovaCommand::getHelpString(){ try { string helpString = ""; helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n"; helpString += "The amova command outputs a .amova file.\n"; helpString += "The amova command parameters are " + getCommandParameters() + ". The phylip and design parameters are required, unless you have valid current files.\n"; helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.\n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. The default is all sets in the design file.\n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.\n"; helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string AmovaCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "...XXX is not in your design file, please correct."; issues.push_back(issue); string ianswer = "\tMothur expects the design file to be 2 column with a header line. The first column should contain the names of the samples in the distance matrix. The second column should contain the treatment each sample is assigned to. \n"; ianswers.push_back(ianswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string AmovaCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "amova") { pattern = "[filename],amova"; } //makes file like: amazon.align else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** AmovaCommand::AmovaCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; phylipFileName = validParameter.validFile(parameters, "phylip"); if (phylipFileName == "not open") { phylipFileName = ""; abort = true; } else if (phylipFileName == "not found") { //if there is a current phylip file, use it phylipFileName = current->getPhylipFile(); if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You have no current phylip file and the phylip parameter is required.\n"); abort = true; } }else { current->setPhylipFile(phylipFileName); } //check for required parameters designFileName = validParameter.validFile(parameters, "design"); if (designFileName == "not open") { designFileName = ""; abort = true; } else if (designFileName == "not found") { //if there is a current design file, use it designFileName = current->getDesignFile(); if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current design file and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designFileName); } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, experimentwiseAlpha); string sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); } } } catch(exception& e) { m->errorOut(e, "AmovaCommand", "AmovaCommand"); exit(1); } } //********************************************************************************************************************** int AmovaCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read design file designMap = new DesignMap(designFileName); if (m->getControl_pressed()) { delete designMap; return 0; } if (outputdir == "") { outputdir = util.hasPath(phylipFileName); } //read in distance matrix and square it ReadPhylipVector readMatrix(phylipFileName); vector sampleNames = readMatrix.read(distanceMatrix); if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets for(int i=0;igetControl_pressed()) { delete designMap; return 0; } string group = designMap->get(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else if (!util.inUsersGroups(group, Sets)){ //not in set we want remove it //remove from all other rows for(int j=0;j > origGroupSampleMap; for(int i=0;iget(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else { origGroupSampleMap[group].push_back(i); } } int numGroups = origGroupSampleMap.size(); if (m->getControl_pressed()) { delete designMap; return 0; } //create a new filename ofstream AMOVAFile; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipFileName)); string AMOVAFileName = getOutputFileName("amova", variables); util.openOutputFile(AMOVAFileName, AMOVAFile); outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName); double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha); if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){ int numCombos = numGroups * (numGroups-1) / 2; double pairwiseAlpha = experimentwiseAlpha / (double) numCombos; map >::iterator itA; map >::iterator itB; for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){ itB = itA;itB++; for(;itB!=origGroupSampleMap.end();itB++){ map > pairwiseGroupSampleMap; pairwiseGroupSampleMap[itA->first] = itA->second; pairwiseGroupSampleMap[itB->first] = itB->second; runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha); } } m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n'); } else{ m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); } m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n"); AMOVAFile.close(); delete designMap; m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "execute"); exit(1); } } //********************************************************************************************************************** double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map > groupSampleMap, double alpha) { try { map >::iterator it; int numGroups = groupSampleMap.size(); int totalNumSamples = 0; for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){ totalNumSamples += it->second.size(); } double ssTotalOrig = calcSSTotal(groupSampleMap); double ssWithinOrig = calcSSWithin(groupSampleMap); double ssAmongOrig = ssTotalOrig - ssWithinOrig; double counter = 0; for(int i=0;i > randomizedGroup = getRandomizedGroups(groupSampleMap); double ssWithinRand = calcSSWithin(randomizedGroup); if(ssWithinRand <= ssWithinOrig){ counter++; } } double pValue = (double)counter / (double) iters; string pString = ""; if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); } else { pString = toString(pValue); } vector sampleNames; for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){ sampleNames.push_back(it->first); } string output = util.getStringFromVector(sampleNames, "-"); AMOVAFile << output << "\tAmong\tWithin\tTotal" << endl; m->mothurOut(output + "\tAmong\tWithin\tTotal\n"); AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl; m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n'); int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong; int dfWithin = totalNumSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin; int dfTotal = totalNumSamples - 1; double Fs = MSAmong / MSWithin; AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl; m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n'); AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl; m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n"); AMOVAFile << "Fs:\t" << Fs << endl; m->mothurOut("Fs:\t" + toString(Fs) + '\n'); AMOVAFile << "p-value: " << pString; m->mothurOut("p-value: " + pString); if(pValue < alpha){ AMOVAFile << "*"; m->mothurOut("*"); } AMOVAFile << endl << endl; m->mothurOutEndLine();m->mothurOutEndLine(); return pValue; } catch(exception& e) { m->errorOut(e, "AmovaCommand", "runAMOVA"); exit(1); } } //********************************************************************************************************************** map > AmovaCommand::getRandomizedGroups(map > origMapping){ try{ vector sampleIndices; vector samplesPerGroup; map >::iterator it; for(it=origMapping.begin();it!=origMapping.end();it++){ vector indices = it->second; samplesPerGroup.push_back(indices.size()); sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end()); } util.mothurRandomShuffle(sampleIndices); int index = 0; map > randomizedGroups = origMapping; for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){ for(int i=0;isecond.size();i++){ it->second[i] = sampleIndices[index++]; } } return randomizedGroups; } catch (exception& e) { m->errorOut(e, "AmovaCommand", "getRandomizedGroups"); exit(1); } } //********************************************************************************************************************** double AmovaCommand::calcSSTotal(map >& groupSampleMap) { try { vector indices; map >::iterator it; for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){ indices.insert(indices.end(), it->second.begin(), it->second.end()); } sort(indices.begin(), indices.end()); int numIndices =indices.size(); double ssTotal = 0.0; for(int i=1;ierrorOut(e, "AmovaCommand", "calcSSTotal"); exit(1); } } //********************************************************************************************************************** double AmovaCommand::calcSSWithin(map >& groupSampleMap) { try { double ssWithin = 0.0; map >::iterator it; for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){ double withinGroup = 0; vector samples = it->second; for(int i=0;ierrorOut(e, "AmovaCommand", "calcSSWithin"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/amovacommand.h000077500000000000000000000025131424121717000210410ustar00rootroot00000000000000#ifndef AMOVACOMMAND_H #define AMOVACOMMAND_H /* * amovacommand.h * mothur * * Created by westcott on 2/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class DesignMap; class AmovaCommand : public Command { public: AmovaCommand(string); ~AmovaCommand() = default; vector setParameters(); string getCommandName() { return "amova"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\nhttp://www.mothur.org/wiki/Amova"; } string getDescription() { return "analysis of molecular variance"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: double runAMOVA(ofstream&, map >, double); double calcSSWithin(map >&); double calcSSTotal(map >&); map > getRandomizedGroups(map >); bool abort; vector outputNames, Sets; string inputDir, designFileName, phylipFileName; DesignMap* designMap; vector< vector > distanceMatrix; int iters; double experimentwiseAlpha; }; #endif mothur-1.48.0/source/commands/anosimcommand.cpp000077500000000000000000000361141424121717000215630ustar00rootroot00000000000000/* * anosimcommand.cpp * mothur * * Created by westcott on 2/14/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "anosimcommand.h" #include "inputdata.h" #include "readphylipvector.h" #include "designmap.h" //********************************************************************************************************************** vector AnosimCommand::setParameters(){ try { CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","anosim",false,true,true); parameters.push_back(pdesign); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","anosim",false,true,true); parameters.push_back(pphylip); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["anosim"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string AnosimCommand::getHelpString(){ try { string helpString = ""; helpString += "Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure. _Australian Journal of Ecology_ 18, 117-143.\n"; helpString += "The anosim command outputs a .anosim file. \n"; helpString += "The anosim command parameters are phylip, iters, and alpha. The phylip and design parameters are required, unless you have valid current files.\n"; helpString += "The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n"; helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n"; helpString += "The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string AnosimCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "anosim") { pattern = "[filename],anosim"; } //makes file like: amazon.align else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** AnosimCommand::AnosimCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; phylipFileName = validParameter.validFile(parameters, "phylip"); if (phylipFileName == "not open") { phylipFileName = ""; abort = true; } else if (phylipFileName == "not found") { //if there is a current phylip file, use it phylipFileName = current->getPhylipFile(); if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You have no current phylip file and the phylip parameter is required.\n"); abort = true; } }else { current->setPhylipFile(phylipFileName); } //check for required parameters designFileName = validParameter.validFile(parameters, "design"); if (designFileName == "not open") { designFileName = ""; abort = true; } else if (designFileName == "not found") { //if there is a current design file, use it designFileName = current->getDesignFile(); if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current design file and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designFileName); } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, experimentwiseAlpha); } } catch(exception& e) { m->errorOut(e, "AnosimCommand", "AnosimCommand"); exit(1); } } //********************************************************************************************************************** int AnosimCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read design file designMap = new DesignMap(designFileName); if (m->getControl_pressed()) { delete designMap; return 0; } if (outputdir == "") { outputdir = util.hasPath(phylipFileName); } //read in distance matrix and square it ReadPhylipVector readMatrix(phylipFileName); vector sampleNames = readMatrix.read(distanceMatrix); for(int i=0;i > origGroupSampleMap; for(int i=0;iget(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else { origGroupSampleMap[group].push_back(i); } } int numGroups = origGroupSampleMap.size(); if (m->getControl_pressed()) { delete designMap; return 0; } //create a new filename ofstream ANOSIMFile; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipFileName)); string ANOSIMFileName = getOutputFileName("anosim", variables); util.openOutputFile(ANOSIMFileName, ANOSIMFile); outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName); m->mothurOut("\ncomparison\tR-value\tP-value\n"); ANOSIMFile << "comparison\tR-value\tP-value\n"; double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha); if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){ int numCombos = numGroups * (numGroups-1) / 2; double pairwiseAlpha = experimentwiseAlpha / (double) numCombos; for(map >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){ map >::iterator itB = itA; itB++; for(;itB!=origGroupSampleMap.end();itB++){ map > subGroupSampleMap; subGroupSampleMap[itA->first] = itA->second; string groupA = itA->first; subGroupSampleMap[itB->first] = itB->second; string groupB = itB->first; vector subIndices; for(map >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){ subIndices.insert(subIndices.end(), it->second.begin(), it->second.end()); } int subNumSamples = subIndices.size(); sort(subIndices.begin(), subIndices.end()); vector > subDistMatrix(distanceMatrix.size()); for(int i=0;imothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n'); } else{ m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); } m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n"); ANOSIMFile.close(); delete designMap; m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "execute"); exit(1); } } //********************************************************************************************************************** double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector > dMatrix, map > groupSampleMap, double alpha) { try { vector > rankMatrix = convertToRanks(dMatrix); double RValue = calcR(rankMatrix, groupSampleMap); int pCount = 0; for(int i=0;i > randGroupSampleMap = getRandomizedGroups(groupSampleMap); double RValueRand = calcR(rankMatrix, randGroupSampleMap); if(RValue <= RValueRand){ pCount++; } } double pValue = (double)pCount / (double) iters; string pString = ""; if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); } else { pString = toString(pValue); } //map >::iterator it=groupSampleMap.begin(); vector sampleNames; for(map >::iterator it = groupSampleMap.begin();it!=groupSampleMap.end();it++){ sampleNames.push_back(it->first); } string output = util.getStringFromVector(sampleNames, "-"); m->mothurOut(output + '\t' + toString(RValue) + '\t' + pString); ANOSIMFile << output << '\t' << RValue << '\t' << pString; if(pValue < alpha){ ANOSIMFile << "*"; m->mothurOut("*"); } ANOSIMFile << endl; m->mothurOutEndLine(); return pValue; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "calcAnisom"); exit(1); } } //********************************************************************************************************************** double AnosimCommand::calcR(vector > rankMatrix, map > groupSampleMap){ try { int numSamples = 0; for(map >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){ numSamples += it->second.size(); } double within = 0.0; int numWithinComps = 0; for(map >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){ vector indices = it->second; for(int i=0;i indices[j]) { within += rankMatrix[indices[i]][indices[j]]; } else { within += rankMatrix[indices[j]][indices[i]]; } numWithinComps++; } } } within /= (float) numWithinComps; double between = 0.0; int numBetweenComps = 0; map >::iterator itB; for(map >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){ for(int i=0;isecond.size();i++){ int A = itA->second[i]; map >::iterator itB = itA; itB++; for(;itB!=groupSampleMap.end();itB++){ for(int j=0;jsecond.size();j++){ int B = itB->second[j]; if(A>B) { between += rankMatrix[A][B]; } else { between += rankMatrix[B][A]; } numBetweenComps++; } } } } between /= (float) numBetweenComps; double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0); return Rvalue; } catch(exception& e) { m->errorOut(e, "AnosimCommand", "calcWithinBetween"); exit(1); } } //********************************************************************************************************************** vector > AnosimCommand::convertToRanks(vector > dist) { try { vector cells; vector > ranks = dist; for (int i = 0; i < dist.size(); i++) { for (int j = 0; j < i; j++) { if(dist[i][j] != -1){ seqDist member(i, j, dist[i][j]); cells.push_back(member); } } } //sort distances sort(cells.begin(), cells.end(), compareSequenceDistance); //find ranks of distances int index = 0; int indexSum = 0; for(int i=0;ierrorOut(e, "AnosimCommand", "convertToRanks"); exit(1); } } //********************************************************************************************************************** map > AnosimCommand::getRandomizedGroups(map > origMapping){ try{ vector sampleIndices; vector samplesPerGroup; map >::iterator it; for(it=origMapping.begin();it!=origMapping.end();it++){ vector indices = it->second; samplesPerGroup.push_back(indices.size()); sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end()); } util.mothurRandomShuffle(sampleIndices); int index = 0; map > randomizedGroups = origMapping; for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){ for(int i=0;isecond.size();i++){ it->second[i] = sampleIndices[index++]; } } return randomizedGroups; } catch (exception& e) { m->errorOut(e, "AnosimCommand", "randomizeGroups"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/anosimcommand.h000077500000000000000000000026551424121717000212330ustar00rootroot00000000000000#ifndef ANOSIMCOMMAND_H #define ANOSIMCOMMAND_H /* * anosimcommand.h * mothur * * Created by westcott on 2/14/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class DesignMap; class AnosimCommand : public Command { public: AnosimCommand(string); ~AnosimCommand(){} vector setParameters(); string getCommandName() { return "anosim"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure. _Australian Journal of Ecology_ 18, 117-143.\nhttp://www.mothur.org/wiki/Anosim"; } string getDescription() { return "analysis of similarity"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; DesignMap* designMap; string inputDir, designFileName, phylipFileName; vector > convertToRanks(vector >); double calcR(vector >, map >); map > getRandomizedGroups(map >); double runANOSIM(ofstream&, vector >, map >, double); vector< vector > distanceMatrix; vector outputNames; int iters; double experimentwiseAlpha; vector< vector > namesOfGroupCombos; }; #endif mothur-1.48.0/source/commands/binsequencecommand.cpp000066400000000000000000000345701424121717000225770ustar00rootroot00000000000000/* * binsequencecommand.cpp * Mothur * * Created by Sarah Westcott on 4/3/09. * Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved. * */ #include "binsequencecommand.h" //********************************************************************************************************************** vector BinSeqCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string BinSeqCommand::getHelpString(){ try { string helpString = ""; helpString += "The bin.seqs command parameters are list, fasta, name, count, label and group. The fasta and list are required, unless you have a valid current list and fasta file.\n"; helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n"; helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n"; helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n"; helpString += "The default value for label is all lines in your inputfile.\n"; helpString += "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name.\n"; helpString += "If you provide a groupfile, then it also appends the sequences group to the name.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string BinSeqCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[distance],fasta"; } //makes file like: amazon.0.03.fasta else if (type == "count") { pattern = "[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** BinSeqCommand::BinSeqCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); //check for required parameters ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { //if there is a current phylip file, use it fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current list file and the list parameter is required.\n"); abort = true; } } else if (listfile == "not open") { listfile = ""; abort = true; } else { current->setListFile(listfile); } if (outputdir == "") { outputdir = util.hasPath(listfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string namesfile = validParameter.validFile(parameters, "name"); if (namesfile == "not open") { namesfile = ""; abort = true; } else if (namesfile == "not found") { namesfile = ""; } else { current->setNameFile(namesfile); } string groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namesfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if (!abort) { if ((namesfile != "") || (groupfile != "")) { //convert to count string rootFileName = namesfile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(rootFileName)); string outputFileName = getOutputFileName("count", variables); CountTable ct; ct.createTable(namesfile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; //list file will contain redund names since name file is provided - remove dups string tempAccnos = namesfile + ".accnos.temp"; vector namesOfSeqs = ct.getNamesOfSeqs(); util.printAccnos(tempAccnos, namesOfSeqs); string inputString = "list=" + listfile + ", accnos=" + tempAccnos; m->mothurOut("/******************************************/\n"); m->mothurOut("\nRunning command: get.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* getSeqsCommand = new GetSeqsCommand(inputString); getSeqsCommand->execute(); string templistfile = getSeqsCommand->getOutputFiles()["list"][0]; string newName = util.getRootName(listfile) + "unique.list"; util.renameFile(templistfile, newName); listfile = newName; namesfile = ""; groupfile = ""; util.mothurRemove(tempAccnos); current->setListFile(listfile); delete getSeqsCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } } } } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "BinSeqCommand"); exit(1); } } //********************************************************************************************************************** BinSeqCommand::~BinSeqCommand(){} //********************************************************************************************************************** int BinSeqCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } FastaMap fasta; fasta.readFastaFile(fastafile); //if user gave a namesfile then use it if (countfile != "") { ct.readTable(countfile, true, false); } InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; int error = 0; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } error = process(list, fasta); delete list; if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if(m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "execute"); exit(1); } } //********************************************************************************************************************** //return 1 if error, 0 otherwise int BinSeqCommand::process(ListVector* list, FastaMap& fasta) { try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("fasta", variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); m->mothurOut(list->getLabel()); m->mothurOutEndLine(); //for each bin in the list vector vector binLabels = list->getLabels(); for (int i = 0; i < list->size(); i++) { if (m->getControl_pressed()) { return 1; } string binnames = list->get(i); vector names; util.splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { string name = names[j]; //do work for that name string sequence = fasta.getSequence(name); if (countfile != "") { if (sequence != "not found") { if (ct.hasGroupInfo()) { vector groups = ct.getGroups(name); string groupInfo = ""; for (int k = 0; k < groups.size()-1; k++) { groupInfo += groups[k] + "-"; } if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; } else { groupInfo = "not found"; } name = name + "\t" + groupInfo + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name)); out << ">" << name << endl; out << sequence << endl; }else { name = name + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name)); out << ">" << name << endl; out << sequence << endl; } }else { m->mothurOut(name + " is missing from your fasta. Does your list file contain all sequence names or just the uniques?\n"); return 1; } }else { if (sequence != "not found") { name = name + "\t" + binLabels[i]; out << ">" << name << endl; out << sequence << endl; }else { m->mothurOut(name + " is missing from your fasta or count file. Please correct. \n"); return 1; } } } } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "BinSeqCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/binsequencecommand.h000077500000000000000000000023321424121717000222360ustar00rootroot00000000000000#ifndef BINSEQCOMMAND_H #define BINSEQCOMMAND_H /* * binsequencecommand.h * Mothur * * Created by Sarah Westcott on 4/3/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name. */ #include "command.hpp" #include "inputdata.h" #include "listvector.hpp" #include "fastamap.h" #include "groupmap.h" #include "counttable.h" #include "getseqscommand.h" class BinSeqCommand : public Command { public: BinSeqCommand(string); ~BinSeqCommand(); vector setParameters(); string getCommandName() { return "bin.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Bin.seqs"; } string getDescription() { return "maps sequences to otus"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: CountTable ct; bool abort, allLines; set labels; //holds labels to be used string filename, fastafile, listfile, countfile, label; vector outputNames; int process(ListVector*, FastaMap&); }; #endif mothur-1.48.0/source/commands/biominfocommand.cpp000066400000000000000000000433551424121717000221010ustar00rootroot00000000000000// // biominfocommand.cpp // Mothur // // Created by Sarah Westcott on 8/5/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "biominfocommand.h" //********************************************************************************************************************** vector BiomInfoCommand::setParameters(){ try { CommandParameter pbiom("biom", "InputTypes", "", "", "", "", "","",false,true, true); parameters.push_back(pbiom); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "","",false,false); parameters.push_back(pbasis); CommandParameter pformat("format", "Multiple", "hdf5-simple", "hdf5", "", "", "","",false,false, true); parameters.push_back(pformat); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter poutput("output", "Multiple", "simple-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter pprintlevel("printlevel", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pprintlevel); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; maxLevel = 0; vector tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["taxsummary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string BiomInfoCommand::getHelpString(){ try { string helpString = ""; helpString += "The biom.info command reads a biom file creates a shared file. If your biom file contains metadata mothur will also create taxonomy or constaxonomy along with tax.summary files.\n"; helpString += "The biom.info command parameters are " + getCommandParameters() + ". The biom parameter is required.\n"; helpString += "The format parameter allows you indicate type of biom file you have. Options hdf5 or simple. Default is hdf5, unless you are running a version without HDF5 libraries.\n"; helpString += "The label parameter allows you to enter a distance label to be used in the shared file created from your biom file.\n"; helpString += "The relabund parameter allows you to indicate you want the tax.summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n"; helpString += "The output parameter allows you to specify format of your summary file. Options are simple and detail. The default is detail.\n"; helpString += "The printlevel parameter allows you to specify taxlevel of your summary file to print to. Options are 1 to the maz level in the file. The default is -1, meaning max level. If you select a level greater than the level your sequences classify to, mothur will print to the level your max level. \n"; helpString += "For example consider the following basis=sequence could give Clostridiales 3 105, where 105 is the total number of sequences whose OTU classified to Clostridiales. "; helpString += "Now for basis=otu could give Clostridiales 3 7, where 7 is the number of OTUs that classified to Clostridiales.\n"; helpString += "The biom.info command should be in the following format: biom.info(biom=test.biom, label=0.03).\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string BiomInfoCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "Cannot convert error. What do I do?"; issues.push_back(issue); string ianswer = "\tThis issue is caused by a matrix_element_type mismatch. The biom file contains a field called 'matrix_element_type'. This field tells mothur what form your observation data is in: int or float. Mothur expects 'int' (an interger value) because the shared file contains interger value abundance counts. If your file contains float values mothur will round down to the nearest integer value. But if your matrix_element_type=int and yet the file contains integer counts in float form, (ie. 31.0 instead of 31) you will get this error. You can resolve this issue by setting matrix_element_type=float in the biom file.\n"; ianswers.push_back(ianswer); issue = "Mothur can't read my biom file. What does this mean?"; issues.push_back(issue); ianswer = "\tMothur allows for 2 formats: classic (http://biom-format.org/documentation/format_versions/biom-1.0.html) and hdf5 (http://biom-format.org/documentation/format_versions/biom-2.0.html). NOTE: you can only process hdf5 files if you are using our pre-built version or have built your version of mothur with USEHDF5=yes.\n"; ianswers.push_back(ianswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string BiomInfoCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],[tag],shared"; } else if (type == "constaxonomy") { pattern = "[filename],[tag],cons.taxonomy"; } else if (type == "taxonomy") { pattern = "[filename],[tag],taxonomy"; } else if (type == "taxsummary") { pattern = "[filename],[tag],[tag2],tax.summary"; } //tag2 = "" for taxonomy tag2 = cons for constaxonomy else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** BiomInfoCommand::BiomInfoCommand(string option) : Command() { try { maxLevel = 0; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { biomfile = ""; abort = true; } else if (biomfile == "not found") { biomfile = ""; m->mothurOut("[ERROR]: You must provide a biom file, please correct.\n"); abort = true;} else { current->setBiomFile(biomfile); } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = "userLabel"; } output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "detail"; } if ((output != "simple") && (output != "detail")) { m->mothurOut(output + " is not a valid output form. Options are simple and detail. I will use detail.\n"); output = "detail"; } string temp = validParameter.valid(parameters, "relabund"); if (temp == "not found"){ temp = "false"; } else { temp = util.getSimpleName(temp); } relabund = util.isTrue(temp); temp = validParameter.valid(parameters, "printlevel"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, printlevel); basis = validParameter.valid(parameters, "basis"); if (basis == "not found") { basis = "otu"; } if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu.\n"); } format = validParameter.valid(parameters, "format"); if (format == "not found") { #ifdef USE_HDF5 if (!abort) { if (util.isHDF5(biomfile)) { format = "hdf5"; } else { format = "simple"; } } #else format = "simple"; #endif } if ((format != "hdf5") && (format != "simple")) { m->mothurOut("Invalid option for format. format options are hdf5 and simple, quitting.\n"); abort = true; } if (format == "hdf5") { #ifdef USE_HDF5 //do nothing we have the api #else m->mothurOut("[ERROR]: To read HDF5 biom files, you must have the API installed, quitting.\n"); abort=true; #endif } } } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "BiomInfoCommand"); exit(1); } } //********************************************************************************************************************** int BiomInfoCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); Biom* biom; if (format == "hdf5") { biom = new BiomHDF5(biomfile, label); } else { biom = new BiomSimple(biomfile, label); } //getting output filename string filename = biomfile; if (outputdir == "") { outputdir += util.hasPath(filename); } fileroot = outputdir + util.getRootName(util.getSimpleName(biomfile)); SharedRAbundVectors* shared = biom->getSharedRAbundVectors(); if (format == "hdf5") { label = shared->getLabel(); } if (label == "") { label = "userLabel"; shared->setLabels(label); } CountTable ct; vector< map > otuContainsGroups; if (shared != nullptr) { map variables; variables["[filename]"] = fileroot; variables["[tag]"] = label; string sharedFilename = getOutputFileName("shared",variables); outputNames.push_back(sharedFilename); outputTypes["shared"].push_back(sharedFilename); bool printHeaders = true; ofstream out; util.openOutputFile(sharedFilename, out); shared->print(out, printHeaders); out.close(); vector groupNames = shared->getNamesGroups(); for (int j = 0; j < groupNames.size(); j++) { ct.addGroup(groupNames[j]); } int numBins = shared->getNumBins(); for (int i = 0; i < numBins; i++) { int total = 0; map containsGroup; vector abunds; for (int j = 0; j < shared->size(); j++) { if (m->getControl_pressed()) { break; } int abund = shared->get(i, groupNames[j]); total += abund; containsGroup[groupNames[j]] = abund; if (basis == "otu") { if (abund > 0) { abund = 1; } } //count presence in otu abunds.push_back(abund); } ct.push_back(shared->getOTUName(i), abunds); otuContainsGroups.push_back(containsGroup); } } //print group taxonomies if given map groupTaxonomies = biom->getGroupTaxonomies(); if (groupTaxonomies.size() != 0) { //write taxonomy file map variables; variables["[filename]"] = fileroot; variables["[tag]"] = label; string taxFilename = getOutputFileName("taxonomy",variables); outputNames.push_back(taxFilename); outputTypes["taxonomy"].push_back(taxFilename); ofstream outTax; util.openOutputFile(taxFilename, outTax); GroupMap* g = nullptr; PhyloSummary taxSum(g, relabund, printlevel); //print group taxonomy if given for (map::iterator it = groupTaxonomies.begin(); it!= groupTaxonomies.end(); it++) { outTax << it->first << '\t' << it->second << endl; taxSum.addSeqToTree(it->first, it->second); } outTax.close(); //write taxonomy file variables["[tag2]"] = ""; string taxSumFilename = getOutputFileName("taxsummary",variables); outputNames.push_back(taxSumFilename); outputTypes["taxsummary"].push_back(taxSumFilename); ofstream outTaxSum; util.openOutputFile(taxSumFilename, outTaxSum); //write tax.summary if (relabund) { taxSum.print(outTaxSum, relabund); } else { taxSum.print(outTaxSum, output); } outTaxSum.close(); } //print consTaxonomy if given vector consTax = biom->getConsTaxonomies(); if (consTax.size() != 0) { //write taxonomy file map variables; variables["[filename]"] = fileroot; variables["[tag]"] = label; string taxFilename = getOutputFileName("constaxonomy",variables); outputNames.push_back(taxFilename); outputTypes["constaxonomy"].push_back(taxFilename); ofstream outTax; util.openOutputFile(taxFilename, outTax); outTax << "OTU\tSize\tTaxonomy\n"; PhyloSummary consTaxSum(&ct, relabund, printlevel); for (int i = 0; i < consTax.size(); i++) { consTax[i].printConsTax(outTax); if (basis == "sequence") { consTaxSum.addSeqToTree(consTax[i].getName(), consTax[i].getConsTaxString()); } else { consTaxSum.addSeqToTree(consTax[i].getConsTaxString(), otuContainsGroups[i]); } //add otu } outTax.close(); variables["[tag2]"] = "cons"; string taxSumFilename = getOutputFileName("taxsummary",variables); outputNames.push_back(taxSumFilename); outputTypes["taxsummary"].push_back(taxSumFilename); ofstream outTaxSum; util.openOutputFile(taxSumFilename, outTaxSum); //write tax.summary if (relabund) { consTaxSum.print(outTaxSum, relabund); } else { consTaxSum.print(outTaxSum, output); } outTaxSum.close(); } delete biom; m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " create mothur files from your biom file.\n\n"); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } } string currentName = ""; itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } //set taxonomy file as new current taxonomyfile itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "BiomInfoCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/biominfocommand.h000077500000000000000000000031301424121717000215340ustar00rootroot00000000000000// // biominfocommand.h // Mothur // // Created by Sarah Westcott on 8/5/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__biominfocommand__ #define __Mothur__biominfocommand__ #include "command.hpp" #include "inputdata.h" #include "phylosummary.h" #include "biomsimple.hpp" #include "biomhdf5.hpp" #define MAX_NAME 1024 class BiomInfoCommand : public Command { #ifdef UNIT_TEST friend class TestBiomInfoCommand; #endif public: BiomInfoCommand(string); ~BiomInfoCommand() = default; vector setParameters(); string getCommandName() { return "biom.info"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getCommonQuestions(); string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Biom.info"; } string getDescription() { return "create 'mothur' files from a biom file. ie: shared, taxonomy, constaxonomy"; } int execute(); void help() { m->mothurOut(getHelpString()); } protected: void createFilesFromBiomSimple(); int extractFilesFromHDF5(); vector outputNames, otuNames, sampleNames, taxonomy; vector indices, indptr, otudata; string fileroot, biomfile, label, basis, output, format; bool firsttime, abort, relabund; int maxLevel, printlevel, nnz; #ifdef USE_HDF5 void processAttributes(H5::Group&, set&); void checkGroups(H5::H5File&, map >&); #endif }; #endif /* defined(__Mothur__biominfocommand__) */ mothur-1.48.0/source/commands/chimerabellerophoncommand.cpp000077500000000000000000000247471424121717000241500ustar00rootroot00000000000000/* * chimerabellerophoncommand.cpp * Mothur * * Created by westcott on 4/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimerabellerophoncommand.h" #include "bellerophon.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraBellerophonCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none","none","none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter); CommandParameter pcorrection("correction", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcorrection); CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow); CommandParameter pincrement("increment", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pincrement); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","fasta",false,false); parameters.push_back(premovechimeras); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraBellerophonCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.bellerophon command reads a fastafile and creates list of potentially chimeric sequences.\n"; helpString += "The chimera.bellerophon command parameters are fasta, filter, correction, processors, window, increment. The fasta parameter is required, unless you have a valid current file.\n"; helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter, default=false. \n"; helpString += "The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs, default=true.\n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras, default is 1/4 sequence length. \n"; helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 25.\n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "chimera.bellerophon(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors) \n"; helpString += "Example: chimera.bellerophon(fasta=AD.align, filter=True, correction=true, window=200) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraBellerophonCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],bellerophon.chimeras"; } else if (type == "accnos") { pattern = "[filename],bellerophon.accnos"; } else if (type == "fasta") { pattern = "[filename],bellerophon.fasta"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } string temp; temp = validParameter.valid(parameters, "filter"); if (temp == "not found") { temp = "F"; } filter = util.isTrue(temp); temp = validParameter.valid(parameters, "correction"); if (temp == "not found") { temp = "T"; } correction = util.isTrue(temp); temp = validParameter.valid(parameters, "window"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, window); temp = validParameter.valid(parameters, "increment"); if (temp == "not found") { temp = "25"; } util.mothurConvert(temp, increment); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "ChimeraBellerophonCommand"); exit(1); } } //*************************************************************************************************************** int ChimeraBellerophonCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); MothurChimera* chimera = new Bellerophon(fastafile, filter, correction, window, increment, outputdir); chimera->getChimeras(); if (m->getControl_pressed()) { delete chimera; return 0; } if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); ofstream out2; util.openOutputFile(accnosFileName, out2); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); //print results numSeqs = chimera->print(out, out2, ""); out.close(); out2.close(); delete chimera; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n\n"); if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/chimerabellerophoncommand.h000077500000000000000000000024411424121717000236000ustar00rootroot00000000000000#ifndef CHIMERABELLEROPHONCOMMAND_H #define CHIMERABELLEROPHONCOMMAND_H /* * chimerabellerophoncommand.h * Mothur * * Created by westcott on 4/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "mothurchimera.h" /***********************************************************/ class ChimeraBellerophonCommand : public Command { public: ChimeraBellerophonCommand(string); ~ChimeraBellerophonCommand(){} vector setParameters(); string getCommandName() { return "chimera.bellerophon"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Huber T, Faulkner G, Hugenholtz P (2004). Bellerophon: a program to detect chimeric sequences in multiple sequence alignments. Bioinformatics 20: 2317-9. \nhttp://www.mothur.org/wiki/Chimera.bellerophon"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, filter, correction, removeChimeras; string fastafile; int processors, window, increment, numSeqs; vector outputNames; }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimeraccodecommand.cpp000077500000000000000000000337531424121717000227110ustar00rootroot00000000000000/* * chimeraccodecommand.cpp * Mothur * * Created by westcott on 3/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimeraccodecommand.h" #include "ccode.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraCcodeCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-mapinfo-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter); CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow); CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pnumwanted); CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","fasta",false,false); parameters.push_back(premovechimeras); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["mapinfo"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraCcodeCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was created using the algorithms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n"; helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n"; helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n"; helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n"; helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The chimera.ccode command should be in the following format: \n"; helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n"; helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraCcodeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],[tag],ccode.chimeras-[filename],ccode.chimeras"; } else if (type == "accnos") { pattern = "[filename],[tag],ccode.accnos-[filename],ccode.accnos"; } else if (type == "fasta") { pattern = "[filename],ccode.fasta-[filename],[tag],ccode.fasta"; } else if (type == "mapinfo") { pattern = "[filename],mapinfo"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraCcodeCommand::ChimeraCcodeCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } maskfile = validParameter.validPath(parameters, "mask"); if (maskfile == "not found") { maskfile = ""; } else if (maskfile != "default") { ifstream in; bool ableToOpen = util.openInputFile(maskfile, in); if (!ableToOpen) { abort = true; } in.close(); }else if (maskfile == "default") { m->mothurOut("[NOTE]: Using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013.\n"); } string temp; temp = validParameter.valid(parameters, "filter"); if (temp == "not found") { temp = "F"; } filter = util.isTrue(temp); temp = validParameter.valid(parameters, "window"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, window); temp = validParameter.valid(parameters, "numwanted"); if (temp == "not found") { temp = "20"; } util.mothurConvert(temp, numwanted); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); //this has to go after save so that if the user sets save=t and provides no reference we abort templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; }else if (templatefile == "not open") { abort = true; } } } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand"); exit(1); } } //*************************************************************************************************************** int ChimeraCcodeCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); if (outputdir == "") { outputdir = util.hasPath(fastafile); } string outputFileName, accnosFileName; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string mapInfo = getOutputFileName("mapinfo", variables); if (maskfile != "") { variables["[tag]"] = maskfile; } outputFileName = getOutputFileName("chimera", variables); accnosFileName = getOutputFileName("accnos", variables); if (m->getControl_pressed()) { return 0; } numSeqs = driver(outputFileName, fastafile, accnosFileName); if (m->getControl_pressed()) { util.mothurRemove(outputFileName); util.mothurRemove(accnosFileName); return 0; } ofstream outHeader; string tempHeader = outputdir + util.getRootName(util.getSimpleName(fastafile)) + maskfile + "ccode.chimeras.tempHeader"; util.openOutputFile(tempHeader, outHeader); outHeader << "For full window mapping info refer to " << mapInfo << endl << endl; outHeader.close(); util.appendFiles(outputFileName, tempHeader); util.mothurRemove(outputFileName); rename(tempHeader.c_str(), outputFileName.c_str()); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(mapInfo); outputTypes["mapinfo"].push_back(mapInfo); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n"); if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraCcodeCommand::driver(string outputFName, string filename, string accnos){ try { MothurChimera* chimera = new Ccode(fastafile, templatefile, filter, maskfile, window, numwanted, outputdir); //is your template aligned? if (chimera->getUnaligned()) { m->mothurOut("[ERROR]: Your reference sequences are unaligned, please correct.\n"); delete chimera; return 0; } templateSeqsLength = chimera->getLength(); ofstream out; util.openOutputFile(outputFName, out); ofstream out2; util.openOutputFile(accnos, out2); ifstream inFASTA; util.openInputFile(filename, inFASTA); int count = 0; while (!inFASTA.eof()) { if (m->getControl_pressed()) { count = 1; break; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getAligned().length() != templateSeqsLength) { m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping.\n"); }else{ //find chimeras chimera->getChimeras(candidateSeq); if (m->getControl_pressed()) { delete candidateSeq; return 1; } //print results chimera->print(out, out2); } count++; } delete candidateSeq; //report progress if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } } //report progress if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } out.close(); out2.close(); inFASTA.close(); delete chimera; return count; } catch(exception& e) { m->errorOut(e, "ChimeraCcodeCommand", "driver"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/chimeraccodecommand.h000077500000000000000000000024551424121717000223510ustar00rootroot00000000000000#ifndef CHIMERACCODECOMMAND_H #define CHIMERACCODECOMMAND_H /* * chimeraccodecommand.h * Mothur * * Created by westcott on 3/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "mothurchimera.h" /***********************************************************/ class ChimeraCcodeCommand : public Command { public: ChimeraCcodeCommand(string); ~ChimeraCcodeCommand(){} vector setParameters(); string getCommandName() { return "chimera.ccode"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Gonzalez JM, Zimmermann J, Saiz-Jimenez C (2005). Evaluating putative chimeric sequences from PCR-amplified products. Bioinformatics 21: 333-7. \nhttp://www.mothur.org/wiki/Chimera.ccode"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, filter, save, removeChimeras; string fastafile, templatefile, maskfile; int window, numwanted, numSeqs, templateSeqsLength; vector outputNames; int driver(string, string, string); }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimeracheckcommand.cpp000077500000000000000000000230441424121717000227010ustar00rootroot00000000000000/* * chimeracheckcommand.cpp * Mothur * * Created by westcott on 3/31/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimeracheckcommand.h" //********************************************************************************************************************** vector ChimeraCheckCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter psvg("svg", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psvg); CommandParameter pincrement("increment", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pincrement); CommandParameter pksize("ksize", "Number", "", "7", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraCheckCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.check command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was created using the algorithms described in CHIMERA_CHECK version 2.7 written by Niels Larsen. \n"; helpString += "The chimera.check command parameters are fasta, reference, processors, ksize, increment, svg and name.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n"; helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default is 10.\n"; helpString += "The ksize parameter allows you to input kmersize, default is 7. \n"; helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n"; helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n"; helpString += "The chimera.check command should be in the following format: \n"; helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n"; helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraCheckCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],chimeracheck.chimeras"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraCheckCommand::ChimeraCheckCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } //this has to go after save so that if the user sets save=t and provides no reference we abort templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; }else if (templatefile == "not open") { abort = true; } string temp = validParameter.valid(parameters, "ksize"); if (temp == "not found") { temp = "7"; } util.mothurConvert(temp, ksize); temp = validParameter.valid(parameters, "svg"); if (temp == "not found") { temp = "F"; } svg = util.isTrue(temp); if (namefile != "") { svg = true; } temp = validParameter.valid(parameters, "increment"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, increment); } } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "ChimeraCheckCommand"); exit(1); } } //*************************************************************************************************************** int ChimeraCheckCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); numSeqs = checkChimeras(); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } outputTypes.clear(); return 0; } m->mothurOut("\nThis method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values.\n"); m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraCheckCommand::checkChimeras(){ try { if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("chimera", variables); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); MothurChimera* chimera = new ChimeraCheckRDP(fastafile, templatefile, namefile, svg, increment, ksize, outputdir); ofstream out; util.openOutputFile(outputFileName, out); ofstream out2; ifstream inFASTA; util.openInputFile(fastafile, inFASTA); int count = 0; while (!inFASTA.eof()) { if (m->getControl_pressed()) { break; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file //find chimeras chimera->getChimeras(candidateSeq); if (m->getControl_pressed()) { delete candidateSeq; return 1; } //print results chimera->print(out, out2); count++; } delete candidateSeq; //report progress if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } } //report progress if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } out.close(); inFASTA.close(); delete chimera; return count; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "checkChimeras"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/chimeracheckcommand.h000077500000000000000000000023641424121717000223500ustar00rootroot00000000000000#ifndef CHIMERACHECKCOMMAND_H #define CHIMERACHECKCOMMAND_H /* * chimeracheckcommand.h * Mothur * * Created by westcott on 3/31/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "mothurchimera.h" #include "chimeracheckrdp.h" /***********************************************************/ class ChimeraCheckCommand : public Command { public: ChimeraCheckCommand(string); ~ChimeraCheckCommand(){} vector setParameters(); string getCommandName() { return "chimera.check"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "CHIMERA_CHECK version 2.7 written by Niels Larsen (http://wdcm.nig.ac.jp/RDP/docs/chimera_doc.html) \nhttp://www.mothur.org/wiki/Chimera.check"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int checkChimeras(); bool abort, svg, save; string fastafile, templatefile, namefile; int increment, ksize, numSeqs, templateSeqsLength; vector outputNames; }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimeraperseuscommand.cpp000066400000000000000000001323361424121717000233140ustar00rootroot00000000000000/* * chimeraperseuscommand.cpp * Mothur * * Created by westcott on 10/26/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "chimeraperseuscommand.h" #include "uniqueseqscommand.h" #include "sequence.hpp" #include "counttable.h" #include "sequencecountparser.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraPerseusCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","fasta",false,false); parameters.push_back(premovechimeras); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter palpha("alpha", "Number", "", "-5.54", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pbeta("beta", "Number", "", "0.33", "", "", "","",false,false); parameters.push_back(pbeta); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraPerseusCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n"; helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed.\n"; helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The alpha parameter .... The default is -5.54. \n"; helpString += "The beta parameter .... The default is 0.33. \n"; helpString += "The cutoff parameter .... The default is 0.50. \n"; helpString += "The chimera.perseus command should be in the following format: \n"; helpString += "chimera.perseus(fasta=yourFastaFile, name=yourNameFile) \n"; helpString += "Example: chimera.perseus(fasta=AD.align, name=AD.names) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraPerseusCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],perseus.chimeras"; } else if (type == "accnos") { pattern = "[filename],perseus.accnos"; } else if (type == "fasta") { pattern = "[filename],perseus.fasta"; } else if (type == "count") { pattern = "[filename],perseus.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraPerseusCommand::ChimeraPerseusCommand(string option) : Command() { try { hasCount = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); //check for required parameters ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } bool hasName = false; string namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } if (namefile != "") { hasName = true; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "") { hasCount = true; } //make sure there is at least one valid file left if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (!hasName && !hasCount) { //if there is a current name file, use it, else look for current count file string filename = current->getNameFile(); if (filename != "") { hasName = true; namefile = filename; m->mothurOut("Using " + filename + " as input file for the name parameter.\n"); } else { filename = current->getCountFile(); if (filename != "") { hasCount = true; countfile = filename; m->mothurOut("Using " + filename + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You must provide a count or name file.\n"); abort = true; } } } bool hasGroup = false; string groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); hasGroup = true; } if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found"){ temp = "0.50"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found"){ temp = "-5.54"; } util.mothurConvert(temp, alpha); temp = validParameter.valid(parameters, "beta"); if (temp == "not found"){ temp = "0.33"; } util.mothurConvert(temp, beta); temp = validParameter.valid(parameters, "dereplicate"); if (temp == "not found") { temp = "false"; } dups = util.isTrue(temp); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); if (!abort) { if ((namefile != "") || (groupfile != "")) { //convert to count string rootFileName = namefile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } string outputFileName = outputdir + util.getRootName(util.getSimpleName(rootFileName)) + "count_table"; CountTable ct; ct.createTable(namefile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; hasCount = true; } } } } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "ChimeraPerseusCommand"); exit(1); } } /**************************************************************************************************/ struct perseusData { Utils util; MothurOut* m; vector sequences; string group; int count, numChimeras; string chimeraFileName; string accnosFileName; double alpha, beta, cutoff; perseusData(string cf, string ac, double a, double b, double c){ m = MothurOut::getInstance(); count = 0; numChimeras = 0; accnosFileName = ac; chimeraFileName = cf; alpha = a; beta = b; cutoff = c; } }; //********************************************************************************************************************** //void driver(string chimeraFileName, vector& sequences, string accnosFileName, int& numChimeras){ void driver(perseusData* params){ try { vector > correctModel(4); //could be an option in the future to input own model matrix for(int i=0;i<4;i++){ correctModel[i].resize(4); } correctModel[0][0] = 0.000000; //AA correctModel[1][0] = 11.619259; //CA correctModel[2][0] = 11.694004; //TA correctModel[3][0] = 7.748623; //GA correctModel[1][1] = 0.000000; //CC correctModel[2][1] = 7.619657; //TC correctModel[3][1] = 12.852562; //GC correctModel[2][2] = 0.000000; //TT correctModel[3][2] = 10.964048; //TG correctModel[3][3] = 0.000000; //GG for(int i=0;i<4;i++){ for(int j=0;jsequences.size(); int alignLength = params->sequences[0].sequence.size(); ofstream chimeraFile; ofstream accnosFile; params->util.openOutputFile(params->chimeraFileName, chimeraFile); params->util.openOutputFile(params->accnosFileName, accnosFile); Perseus myPerseus; vector > binMatrix = myPerseus.binomial(alignLength); chimeraFile << "SequenceIndex\tName\tDiffsToBestMatch\tBestMatchIndex\tBestMatchName\tDiffstToChimera\tIndexofLeftParent\tIndexOfRightParent\tNameOfLeftParent\tNameOfRightParent\tDistanceToBestMatch\tcIndex\t(cIndex - singleDist)\tloonIndex\tMismatchesToChimera\tMismatchToTrimera\tChimeraBreakPoint\tLogisticProbability\tTypeOfSequence\n"; vector chimeras(numSeqs, 0); for(int i=0;im->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } vector restricted = chimeras; vector > leftDiffs(numSeqs); vector > leftMaps(numSeqs); vector > rightDiffs(numSeqs); vector > rightMaps(numSeqs); vector singleLeft, bestLeft; vector singleRight, bestRight; int bestSingleIndex, bestSingleDiff; vector alignments(numSeqs); int comparisons = myPerseus.getAlignments(i, params->sequences, alignments, leftDiffs, leftMaps, rightDiffs, rightMaps, bestSingleIndex, bestSingleDiff, restricted); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } int minMismatchToChimera, leftParentBi, rightParentBi, breakPointBi; string dummyA, dummyB; if (params->sequences[i].sequence.size() < 3) { chimeraFile << i << '\t' << params->sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl; }else if(comparisons >= 2){ minMismatchToChimera = myPerseus.getChimera(params->sequences, leftDiffs, rightDiffs, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight, restricted); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } int minMismatchToTrimera = numeric_limits::max(); int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB; if(minMismatchToChimera >= 3 && comparisons >= 3){ minMismatchToTrimera = myPerseus.getTrimera(params->sequences, leftDiffs, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight, restricted); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } } double singleDist = myPerseus.modeledPairwiseAlignSeqs(params->sequences[i].sequence, params->sequences[bestSingleIndex].sequence, dummyA, dummyB, correctModel); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } string type; string chimeraRefSeq; if(minMismatchToChimera - minMismatchToTrimera >= 3){ type = "trimera"; chimeraRefSeq = myPerseus.stitchTrimera(alignments, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, leftMaps, rightMaps); } else{ type = "chimera"; chimeraRefSeq = myPerseus.stitchBimera(alignments, leftParentBi, rightParentBi, breakPointBi, leftMaps, rightMaps); } if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } double chimeraDist = myPerseus.modeledPairwiseAlignSeqs(params->sequences[i].sequence, chimeraRefSeq, dummyA, dummyB, correctModel); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } double cIndex = chimeraDist;//modeledPairwiseAlignSeqs(sequences[i].sequence, chimeraRefSeq); double loonIndex = myPerseus.calcLoonIndex(params->sequences[i].sequence, params->sequences[leftParentBi].sequence, params->sequences[rightParentBi].sequence, breakPointBi, binMatrix); if (params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); break; } chimeraFile << i << '\t' << params->sequences[i].seqName << '\t' << bestSingleDiff << '\t' << bestSingleIndex << '\t' << params->sequences[bestSingleIndex].seqName << '\t'; chimeraFile << minMismatchToChimera << '\t' << leftParentBi << '\t' << rightParentBi << '\t' << params->sequences[leftParentBi].seqName << '\t' << params->sequences[rightParentBi].seqName << '\t'; chimeraFile << singleDist << '\t' << cIndex << '\t' << (cIndex - singleDist) << '\t' << loonIndex << '\t'; chimeraFile << minMismatchToChimera << '\t' << minMismatchToTrimera << '\t' << breakPointBi << '\t'; double probability = myPerseus.classifyChimera(singleDist, cIndex, loonIndex, params->alpha, params->beta); chimeraFile << probability << '\t'; if(probability > params->cutoff){ chimeraFile << type << endl; accnosFile << params->sequences[i].seqName << endl; chimeras[i] = 1; params->numChimeras++; } else{ chimeraFile << "good" << endl; } } else{ chimeraFile << i << '\t' << params->sequences[i].seqName << "\t0\t0\tNull\t0\t0\t0\tNull\tNull\t0.0\t0.0\t0.0\t0\t0\t0\t0.0\t0.0\tgood" << endl; } //report progress if((i+1) % 100 == 0){ params->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1) + "\n"); } params->count++; //# of sequences completed. Used by calling function to check for failure } if((numSeqs) % 100 != 0){ params->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs) + "\n"); } if (!params->m->getControl_pressed()) { chimeraFile.close(); accnosFile.close(); } } catch(exception& e) { params->m->errorOut(e, "ChimeraPerseusCommand", "driver"); exit(1); } } //*************************************************************************************************************** int ChimeraPerseusCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); string newCountFile = ""; if (countfile == "") { countfile = getCountFile(fastafile); hasCount=true; } if (m->getControl_pressed()) { return 0; } int numSeqs = 0; int numChimeras = 0; if (hasCount) { CountTable ct; vector groups; if (ct.testGroups(countfile, groups)) { //fills groups if count file has them variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); newCountFile = getOutputFileName("count", variables); vector groups; map > group2Files; if (hasCount) { current->setMothurCalling(true); SequenceCountParser cparser(countfile, fastafile, nullVector); current->setMothurCalling(false); groups = cparser.getNamesOfGroups(); group2Files = cparser.getFiles(); } if (m->getControl_pressed()) { return 0; } //clears files ofstream out, out1, out2; util.openOutputFile(outputFileName, out); out.close(); util.openOutputFile(accnosFileName, out1); out1.close(); string countlist = accnosFileName+".byCount"; numSeqs = createProcessesGroups(group2Files, outputFileName, countlist, accnosFileName, newCountFile, groups, fastafile, countfile, numChimeras); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } if (!dups) { numChimeras = deconvoluteResults(outputFileName, accnosFileName); }else { CountTable newCount; newCount.readTable(countfile, true, false); if (!util.isBlank(countlist)) { ifstream in2; util.openInputFile(countlist, in2); string name, group; while (!in2.eof()) { in2 >> name; gobble(in2); in2 >> group; gobble(in2); newCount.setAbund(name, group, 0); } in2.close(); } util.mothurRemove(countlist); //print new *.pick.count_table vector namesInTable = newCount.printTable(newCountFile); //returns non zeroed names outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); unordered_set doNotRemove = util.mothurConvert(namesInTable); //remove names we want to keep from accnos file. unordered_set accnosNames = util.readAccnos(accnosFileName); ofstream out2; util.openOutputFile(accnosFileName, out2); for (auto it = accnosNames.begin(); it != accnosNames.end(); it++) { if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; } } out2.close(); } util.mothurRemove(countlist); m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples.\n"); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } }else { if (processors != 1) { m->mothurOut("Your count file does not contain group information, mothur can only use 1 processor, continuing.\n"); processors = 1; } //read sequences and store sorted by frequency ct.readTable(countfile, false, false); vector sequences = readFiles(fastafile, ct.getNameMap()); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } perseusData* dataBundle = new perseusData(outputFileName, accnosFileName, alpha, beta, cutoff); dataBundle->sequences = sequences; driver(dataBundle); numSeqs = dataBundle->count; numChimeras = dataBundle->numChimeras; delete dataBundle; } } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.\n"); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; if ((countfile != "") && (!dups)) { inputString += ", count=" + countfile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (countfile != "") { if (!dups) { //dereplicate=f, so remove sequences where any sample found the reads to be chimeric map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); string currentName = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], currentName); util.mothurRemove(filenames["count"][0]); outputNames.push_back(currentName); outputTypes["count"].push_back(currentName); }//else, mothur created a modified count file removing chimeras by sample. No need to include count file on remove.seqs command. Deconvolute function created modified count table already } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "execute"); exit(1); } } //********************************************************************************************************************** string ChimeraPerseusCommand::getCountFile(string& inputFile){ try { string countFile = ""; m->mothurOut("\nNo count file given, running unique.seqs command to generate one.\n\n"); //use unique.seqs to create new name and fastafile string inputString = "format=count, fasta=" + inputFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); countFile = filenames["count"][0]; inputFile = filenames["fasta"][0]; return countFile; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "getNamesFile"); exit(1); } } /**************************************************************************************************/ struct perseusGroupsData { string fastafile; string dupsfile; string chimeraFileName; string accnosFileName; string countlist; map > parsedFiles; bool hasCount, dups; int threadID, count, numChimeras; double alpha, beta, cutoff; vector groups; Utils util; MothurOut* m; perseusGroupsData(){} perseusGroupsData(map >& g2f,bool dps, bool hc, double a, double b, double c, string o, string f, string n, string ac, string ctlist, vector gr, int tid) { alpha = a; beta = b; cutoff = c; fastafile = f; dupsfile = n; chimeraFileName = o; countlist = ctlist; accnosFileName = ac; m = MothurOut::getInstance(); threadID = tid; groups = gr; hasCount = hc; dups = dps; count = 0; numChimeras = 0; parsedFiles = g2f; } }; //********************************************************************************************************************** vector loadSequences(map& nameMap, string thisGroupsFastaFile, perseusGroupsData* params){ try { bool error = false; vector sequences; ifstream in; params->util.openInputFile(thisGroupsFastaFile, in); vector nameVector; map::iterator itNameMap; while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); itNameMap = nameMap.find(seq.getName()); if (itNameMap == nameMap.end()){ error = true; params->m->mothurOut("[ERROR]: " + seq.getName() + " is in your fastafile, but is not in your name or count file, please correct.\n"); }else { int num = itNameMap->second; seq.setAligned(params->util.removeNs(seq.getUnaligned())); sequences.push_back(seqData(seq.getName(), seq.getUnaligned(), num)); } } in.close(); if (error) { params->m->setControl_pressed(true); } //sort by frequency sort(sequences.rbegin(), sequences.rend()); return sequences; } catch(exception& e) { params->m->errorOut(e, "ChimeraPerseusCommand", "loadSequences"); exit(1); } } //********************************************************************************************************************** //string outputFName, string accnos, string countlist, int start, int end, vector groups void driverGroups(perseusGroupsData* params){ try { //clears files ofstream out, out1, out2; params->util.openOutputFile(params->chimeraFileName, out); out.close(); params->util.openOutputFile(params->accnosFileName, out1); out1.close(); int totalSeqs = 0; ofstream outCountList; if (params->hasCount && params->dups) { params->util.openOutputFile(params->countlist, outCountList); } for (map >::iterator it = params->parsedFiles.begin(); it != params->parsedFiles.end(); it++) { long start = time(nullptr); if (params->m->getControl_pressed()) { break; } string thisGroup = it->first; map nameMap; if (params->hasCount) { CountTable ct; ct.readTable(it->second[1], false, true); nameMap = ct.getNameMap(); } params->m->mothurOut("\nChecking sequences from group " + thisGroup + "...\n"); perseusData* driverParams = new perseusData((params->chimeraFileName+thisGroup), (params->accnosFileName+thisGroup), params->alpha, params->beta, params->cutoff); driverParams->sequences = loadSequences(nameMap, it->second[0], params); if (params->m->getControl_pressed()) { break; } driver(driverParams); totalSeqs += driverParams->count; params->numChimeras += driverParams->numChimeras; if (params->m->getControl_pressed()) { break; } if (params->dups) { if (!params->util.isBlank(driverParams->accnosFileName)) { ifstream in; params->util.openInputFile(driverParams->accnosFileName, in); string name; if (params->hasCount) { while (!in.eof()) { in >> name; gobble(in); outCountList << name << '\t' << thisGroup << endl; } in.close(); } } } //append files params->util.appendFiles(driverParams->chimeraFileName, params->chimeraFileName); params->util.mothurRemove(driverParams->chimeraFileName); params->util.appendFiles(driverParams->accnosFileName, params->accnosFileName); params->util.mothurRemove(driverParams->accnosFileName); params->m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(driverParams->count) + " sequences from group " + thisGroup + ".\n"); delete driverParams; } if (params->hasCount && params->dups) { outCountList.close(); } params->count = totalSeqs; } catch(exception& e) { params->m->errorOut(e, "ChimeraPerseusCommand", "driverGroups"); exit(1); } } //********************************************************************************************************************** vector ChimeraPerseusCommand::readFiles(string inputFile, map nameMap){ try { map::iterator it; //read fasta file and create sequenceData structure - checking for file mismatches vector sequences; bool error = false; ifstream in; util.openInputFile(inputFile, in); alignLength = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return sequences; } Sequence temp(in); gobble(in); it = nameMap.find(temp.getName()); if (it == nameMap.end()) { error = true; m->mothurOut("[ERROR]: " + temp.getName() + " is in your fasta file and not in your namefile, please correct.\n"); } else { temp.setAligned(util.removeNs(temp.getUnaligned())); sequences.push_back(seqData(temp.getName(), temp.getUnaligned(), it->second)); if (temp.getUnaligned().length() > alignLength) { alignLength = temp.getUnaligned().length(); } } } in.close(); if (error) { m->setControl_pressed(true); } //sort by frequency sort(sequences.rbegin(), sequences.rend()); return sequences; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "readFiles"); exit(1); } } /**************************************************************************************************/ //perseusData(vector& s, double a, double b, double c, string o, string ac, MothurOut* mout) //numSeqs = createProcessesGroups(outputFileName, countlist, accnosFileName, newCountFile, groups, fastafile, countfile, numChimeras); int ChimeraPerseusCommand::createProcessesGroups(map >& parsedFiles, string outputFName, string countlisttemp, string accnos, string newCountFile, vector groups, string fasta, string dupsFile, int& numChimeras) { try { numChimeras = 0; //sanity check if (groups.size() < processors) { processors = groups.size(); m->mothurOut("Reducing processors to " + toString(groups.size()) + ".\n"); } //divide the groups between the processors vector lines; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; long long num = 0; time_t start, end; time(&start); //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[i+1].start; j < lines[i+1].end; j++) { map >::iterator it = parsedFiles.find(groups[j]); if (it != parsedFiles.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } perseusGroupsData* dataBundle = new perseusGroupsData(thisGroupsParsedFiles, dups, hasCount, alpha, beta, cutoff, (outputFName+extension), fasta, dupsFile, (accnos+extension), (countlisttemp+extension), thisGroups, (i+1)); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverGroups, dataBundle)); } vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[0].start; j < lines[0].end; j++) { map >::iterator it = parsedFiles.find(groups[j]); if (it != parsedFiles.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } perseusGroupsData* dataBundle = new perseusGroupsData(thisGroupsParsedFiles, dups, hasCount, alpha, beta, cutoff, outputFName, fasta, dupsFile, accnos, countlisttemp, thisGroups, 0); driverGroups(dataBundle); num = dataBundle->count; numChimeras = dataBundle->numChimeras; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; numChimeras += data[i]->numChimeras; string extension = toString(i+1) + ".temp"; util.appendFiles((outputFName+extension), outputFName); util.mothurRemove((outputFName+extension)); util.appendFiles((accnos+extension), accnos); util.mothurRemove((accnos+extension)); util.appendFiles((countlisttemp+extension), countlisttemp); util.mothurRemove((countlisttemp+extension)); delete data[i]; delete workerThreads[i]; } delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to check " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "createProcessesGroups"); exit(1); } } //********************************************************************************************************************** int ChimeraPerseusCommand::deconvoluteResults(string outputFileName, string accnosFileName){ try { int total = 0; unordered_set chimerasInFile = util.readAccnos(accnosFileName);//this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once util.printAccnos(accnosFileName, chimerasInFile); //edit chimera file ifstream in; util.openInputFile(outputFileName, in); ofstream out; util.openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); int DiffsToBestMatch, BestMatchIndex, DiffstToChimera, IndexofLeftParent, IndexOfRightParent; float temp1,temp2, temp3, temp4, temp5, temp6, temp7, temp8; string index, BestMatchName, parent1, parent2, flag; string name = ""; set namesInFile; //assumptions - in file each read will always look like /* SequenceIndex Name DiffsToBestMatch BestMatchIndex BestMatchName DiffstToChimera IndexofLeftParent IndexOfRightParent NameOfLeftParent NameOfRightParent DistanceToBestMatch cIndex (cIndex - singleDist) loonIndex MismatchesToChimera MismatchToTrimera ChimeraBreakPoint LogisticProbability TypeOfSequence 0 F01QG4L02JVBQY 0 0 Null 0 0 0 Null Null 0.0 0.0 0.0 0.0 0 0 0 0.0 0.0 good 1 F01QG4L02ICTC6 0 0 Null 0 0 0 Null Null 0.0 0.0 0.0 0.0 0 0 0 0.0 0.0 good 2 F01QG4L02JZOEC 48 0 F01QG4L02JVBQY 47 0 0 F01QG4L02JVBQY F01QG4L02JVBQY 2.0449 2.03545 -0.00944493 0 47 2147483647 138 0 good 3 F01QG4L02G7JEC 42 0 F01QG4L02JVBQY 40 1 0 F01QG4L02ICTC6 F01QG4L02JVBQY 1.87477 1.81113 -0.0636404 5.80145 40 2147483647 25 0 good */ //get and print headers BestMatchName = util.getline(in); gobble(in); out << BestMatchName << endl; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove((outputFileName+".temp")); return 0; } bool print = false; in >> index; gobble(in); if (index != "SequenceIndex") { //if you are not a header line, there will be a header line for each group if group file is given in >> name; gobble(in); in >> DiffsToBestMatch; gobble(in); in >> BestMatchIndex; gobble(in); in >> BestMatchName; gobble(in); in >> DiffstToChimera; gobble(in); in >> IndexofLeftParent; gobble(in); in >> IndexOfRightParent; gobble(in); in >> parent1; gobble(in); in >> parent2; gobble(in); in >> temp1 >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> flag; gobble(in); //is this name already in the file auto itNames = namesInFile.find((name)); if (itNames == namesInFile.end()) { //no not in file if (flag == "good") { //are you really a no?? //is this sequence really not chimeric?? auto itChimeras = chimerasInFile.find(name); //then you really are a no so print, otherwise skip if (itChimeras == chimerasInFile.end()) { print = true; } }else{ print = true; } } if (print) { namesInFile.insert(name); out << index << '\t' << name << '\t' << DiffsToBestMatch << '\t' << BestMatchIndex << '\t'; out << BestMatchName << '\t' << DiffstToChimera << '\t' << IndexofLeftParent << '\t' << IndexOfRightParent << '\t' << parent1 << '\t' << parent2 << '\t'; out << temp1 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << flag << endl; } }else { index = util.getline(in); gobble(in); } } in.close(); out.close(); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); return total; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "deconvoluteResults"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/chimeraperseuscommand.h000077500000000000000000000033671424121717000227650ustar00rootroot00000000000000#ifndef CHIMERAPERSEUSCOMMAND_H #define CHIMERAPERSEUSCOMMAND_H /* * chimeraperseuscommand.h * Mothur * * Created by westcott on 10/26/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "sequenceparser.h" #include "sequencecountparser.h" #include "myPerseus.h" #include "counttable.h" /***********************************************************/ class ChimeraPerseusCommand : public Command { public: ChimeraPerseusCommand(string); ~ChimeraPerseusCommand() = default; vector setParameters(); string getCommandName() { return "chimera.perseus"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Quince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011). Removing noise from pyrosequenced amplicons. BMC Bioinformatics 12:38.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection. Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.perseus\n"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, hasCount, dups, removeChimeras; string fastafile, countfile; int processors, alignLength; double cutoff, alpha, beta; vector outputNames; string getCountFile(string&); vector readFiles(string, map); int deconvoluteResults(string, string); int createProcessesGroups(map >&, string, string, string, string, vector, string, string, int&); }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimerapintailcommand.cpp000077500000000000000000000455721424121717000232760ustar00rootroot00000000000000/* * chimerapintailcommand.cpp * Mothur * * Created by westcott on 4/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimerapintailcommand.h" #include "pintail.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraPintailCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pconservation); CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pquantile); CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter); CommandParameter pwindow("window", "Number", "", "0", "", "", "","","",false,false); parameters.push_back(pwindow); CommandParameter pincrement("increment", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pincrement); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","alns",false,false); parameters.push_back(premovechimeras); CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["accnos"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraPintailCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was created using the algorithms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n"; helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, removechimeras, conservation and quantile.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n"; helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n"; helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n"; helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n"; helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n"; helpString += "The chimera.pintail command should be in the following format: \n"; helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n"; helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraPintailCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],[tag],pintail.chimeras-[filename],pintail.chimeras"; } else if (type == "accnos") { pattern = "[filename],[tag],pintail.accnos-[filename],pintail.accnos"; } else if (type == "fasta") { pattern = "[filename],[tag],pintail.fasta-[filename],pintail.fasta"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraPintailCommand::ChimeraPintailCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); //check for required parameters ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } string temp; temp = validParameter.valid(parameters, "filter"); if (temp == "not found") { temp = "F"; } filter = util.isTrue(temp); temp = validParameter.valid(parameters, "window"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, window); temp = validParameter.valid(parameters, "increment"); if (temp == "not found") { temp = "25"; } util.mothurConvert(temp, increment); //this has to go after save so that if the user sets save=t and provides no reference we abort templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; }else if (templatefile == "not open") { abort = true; } maskfile = validParameter.validPath(parameters, "mask"); if (maskfile == "not found") { maskfile = ""; } else if (maskfile != "default") { if (util.checkLocations(maskfile, current->getLocations())) { } else { m->mothurOut("Unable to open " + maskfile + ".\n"); abort = true; } //erase from file list }else if (maskfile == "default") { m->mothurOut("Using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013.\n"); } if (outputdir == "") { outputdir = util.hasPath(fastafile); } consfile = validParameter.validFile(parameters, "conservation"); if (consfile == "not open") { abort = true; } else if (consfile == "not found") { consfile = ""; //check for consfile string tempConsFile = util.getRootName(inputDir + util.getSimpleName(templatefile)) + "freq"; ifstream FileTest(tempConsFile.c_str()); if(FileTest){ string line = util.getline(FileTest); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time.\n"); consfile = tempConsFile; FileTest.close(); } }else { string tempConsFile = current->getDefaultPath()[0] + util.getRootName(util.getSimpleName(templatefile)) + "freq"; ifstream FileTest2(tempConsFile.c_str()); if(FileTest2){ string line = util.getline(FileTest2); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time.\n"); consfile = tempConsFile; FileTest2.close(); } } } } quanfile = validParameter.validFile(parameters, "quantile"); if (quanfile == "not open") { abort = true; } else if (quanfile == "not found") { quanfile = ""; } temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand"); exit(1); } } //*************************************************************************************************************** int ChimeraPintailCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); //check for quantile to save the time lookForShortcutFiles(templatefile); numSeqs = checkChimeras(); if (m->getControl_pressed()) { outputTypes.clear(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("\n\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n"); if (removeChimeras) { string accnosFileName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { accnosFileName = (itTypes->second)[0]; } } if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraPintailCommand::checkChimeras(){ try { MothurChimera* chimera = new Pintail(fastafile, templatefile, filter, maskfile, consfile, quanfile, window, increment, outputdir, current->getVersion()); if (m->getControl_pressed()) { delete chimera; return 0; } if (chimera->getUnaligned()) { m->mothurOut("[ERROR]: Your reference sequences are unaligned, please correct.\n"); delete chimera; return 0; } templateSeqsLength = chimera->getLength(); string outputFileName, accnosFileName; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); if (maskfile != "") { variables["[tag]"] = util.getSimpleName(util.getRootName(maskfile)); } outputFileName = getOutputFileName("chimera", variables); accnosFileName = getOutputFileName("accnos", variables); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); ofstream out; util.openOutputFile(outputFileName, out); ofstream out2; util.openOutputFile(accnosFileName, out2); ifstream inFASTA; util.openInputFile(fastafile, inFASTA); int count = 0; while (!inFASTA.eof()) { if (m->getControl_pressed()) { break; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned m->mothurOut("[WARNING]: " + candidateSeq->getName() + " is not the same length as the template sequences. Skipping.\n"); }else{ //find chimeras chimera->getChimeras(candidateSeq); if (m->getControl_pressed()) { delete candidateSeq; return 1; } //print results chimera->print(out, out2); } count++; } delete candidateSeq; //report progress if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } } //report progress if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } out.close(); out2.close(); inFASTA.close(); delete chimera; return count; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "checkChiemras"); exit(1); } } //********************************************************************************************************************** int ChimeraPintailCommand::lookForShortcutFiles(string baseName){ try { string tempQuan = ""; if ((!filter) && (maskfile == "")) { tempQuan = inputDir + util.getRootName(util.getSimpleName(baseName)) + "pintail.quan"; }else if ((!filter) && (maskfile != "")) { tempQuan = inputDir + util.getRootName(util.getSimpleName(baseName)) + "pintail.masked.quan"; }else if ((filter) && (maskfile != "")) { tempQuan = inputDir + util.getRootName(util.getSimpleName(baseName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "masked.quan"; }else if ((filter) && (maskfile == "")) { tempQuan = inputDir + util.getRootName(util.getSimpleName(baseName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "quan"; } ifstream FileTest(tempQuan.c_str()); if(FileTest){ string line = util.getline(FileTest); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time.\n"); quanfile = tempQuan; FileTest.close(); } }else { string tryPath = current->getDefaultPath()[0]; string tempQuan = ""; if ((!filter) && (maskfile == "")) { tempQuan = tryPath + util.getRootName(util.getSimpleName(baseName)) + "pintail.quan"; }else if ((!filter) && (maskfile != "")) { tempQuan = tryPath + util.getRootName(util.getSimpleName(baseName)) + "pintail.masked.quan"; }else if ((filter) && (maskfile != "")) { tempQuan = tryPath + util.getRootName(util.getSimpleName(baseName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "masked.quan"; }else if ((filter) && (maskfile == "")) { tempQuan = tryPath + util.getRootName(util.getSimpleName(baseName)) + "pintail.filtered." + util.getSimpleName(util.getRootName(fastafile)) + "quan"; } ifstream FileTest2(tempQuan.c_str()); if(FileTest2){ string line = util.getline(FileTest2); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time.\n"); quanfile = tempQuan; FileTest2.close(); } } } return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "lookForShortcutFiles"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/chimerapintailcommand.h000077500000000000000000000032351424121717000227310ustar00rootroot00000000000000#ifndef CHIMERAPINTAILCOMMAND_H #define CHIMERAPINTAILCOMMAND_H /* * chimerapintailcommand.h * Mothur * * Created by westcott on 4/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "mothurchimera.h" /***********************************************************/ class ChimeraPintailCommand : public Command { public: ChimeraPintailCommand(string); ~ChimeraPintailCommand(){} vector setParameters(); string getCommandName() { return "chimera.pintail"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Ashelford KE, Chuzhanova NA, Fry JC, Jones AJ, Weightman AJ (2005). At least 1 in 20 16S rRNA sequence records currently held in public repositories is estimated to contain substantial anomalies. Appl Environ Microbiol 71: 7724-36. \nAshelford KE, Chuzhanova NA, Fry JC, Jones AJ, Weightman AJ (2006). New screening software shows that most recent large 16S rRNA gene clone libraries contain chimeras. Appl Environ Microbiol 72: 5734-41. \nhttp://www.mothur.org/wiki/Chimera.pintail"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int checkChimeras(); int lookForShortcutFiles(string baseName); bool abort, filter, save, removeChimeras; string fastafile, templatefile, consfile, quanfile, maskfile, inputDir; int window, increment, numSeqs, templateSeqsLength; vector outputNames; }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimeraslayercommand.cpp000077500000000000000000001340621424121717000231260ustar00rootroot00000000000000/* * chimeraslayercommand.cpp * Mothur * * Created by westcott on 3/31/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chimeraslayercommand.h" #include "uniqueseqscommand.h" #include "sequenceparser.h" #include "counttable.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraSlayerCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pwindow("window", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pwindow); CommandParameter pksize("ksize", "Number", "", "7", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pmatch("match", "Number", "", "5.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pminsim("minsim", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminsim); CommandParameter pmincov("mincov", "Number", "", "70", "", "", "","",false,false); parameters.push_back(pmincov); CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminsnp); CommandParameter pminbs("minbs", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminbs); CommandParameter prealign("realign", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prealign); CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "","fasta",false,false); parameters.push_back(ptrim); CommandParameter psplit("split", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psplit); CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "","",false,false); parameters.push_back(pnumwanted); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "","",false,false); parameters.push_back(pdivergence); CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","alns",false,false); parameters.push_back(premovechimeras); CommandParameter pparents("parents", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pparents); CommandParameter pincrement("increment", "Number", "", "5", "", "", "","",false,false); parameters.push_back(pincrement); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraSlayerCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n"; helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, dereplicate, removechimeras, trim, ksize, window, match, mismatch, divergence, minsim, mincov, minbs, minsnp, parents, iters, increment, numwanted and realign.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The count parameter allows you to provide a count file. The count file reference=self. If your count file contains group information, when checking sequences, only sequences from the same group as the query sequence will be used as the reference. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n"; helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"; helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n"; helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n"; helpString += "The ksize parameter allows you to input kmersize, default is 7, used if search is kmer. \n"; helpString += "The match parameter allows you to reward matched bases, default is 5. \n"; helpString += "The parents parameter allows you to select the number of potential parents to investigate from the numwanted best matches after rating them, default is 3. \n"; helpString += "The mismatch parameter allows you to penalize mismatched bases, default is -4. \n"; helpString += "The divergence parameter allows you to set a cutoff for chimera determination, default is 1.007. \n"; helpString += "The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method, default=1000.\n"; helpString += "The minsim parameter allows you to specify a minimum similarity with the parent fragments, default=90. \n"; helpString += "The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n"; helpString += "The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n"; helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n"; helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true. \n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The chimera.slayer command should be in the following format: \n"; helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate) \n"; helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraSlayerCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],slayer.chimeras"; } else if (type == "accnos") { pattern = "[filename],slayer.accnos"; } else if (type == "fasta") { pattern = "[filename],slayer.fasta"; } else if (type == "count") { pattern = "[filename],slayer.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraSlayerCommand::ChimeraSlayerCommand(string option) : Command() { try { hasCount = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } bool hasName = false; string namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } if (namefile != "") { hasName = true; } //check for required parameters countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "") { hasCount = true; } //make sure there is at least one valid file left if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } bool hasGroup = false; string groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); hasGroup = true; } if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } string path; map::iterator it = parameters.find("reference"); //user has given a template file if(it != parameters.end()){ if (it->second == "self") { templatefile = "self"; } else { templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not open") { abort = true; } else if (templatefile == "not found") { //check for saved reference sequences m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; } } }else if ((hasName) || (hasCount)) { templatefile = "self"; } else { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); templatefile = ""; abort = true; } string temp = validParameter.valid(parameters, "ksize"); if (temp == "not found") { temp = "7"; } util.mothurConvert(temp, ksize); temp = validParameter.valid(parameters, "window"); if (temp == "not found") { temp = "50"; } util.mothurConvert(temp, window); temp = validParameter.valid(parameters, "match"); if (temp == "not found") { temp = "5"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "mismatch"); if (temp == "not found") { temp = "-4"; } util.mothurConvert(temp, mismatch); temp = validParameter.valid(parameters, "divergence"); if (temp == "not found") { temp = "1.007"; } util.mothurConvert(temp, divR); temp = validParameter.valid(parameters, "minsim"); if (temp == "not found") { temp = "90"; } util.mothurConvert(temp, minSimilarity); temp = validParameter.valid(parameters, "mincov"); if (temp == "not found") { temp = "70"; } util.mothurConvert(temp, minCoverage); temp = validParameter.valid(parameters, "minbs"); if (temp == "not found") { temp = "90"; } util.mothurConvert(temp, minBS); temp = validParameter.valid(parameters, "minsnp"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, minSNP); temp = validParameter.valid(parameters, "parents"); if (temp == "not found") { temp = "3"; } util.mothurConvert(temp, parents); temp = validParameter.valid(parameters, "realign"); if (temp == "not found") { temp = "t"; } realign = util.isTrue(temp); temp = validParameter.valid(parameters, "trim"); if (temp == "not found") { temp = "f"; } trim = util.isTrue(temp); temp = validParameter.valid(parameters, "split"); if (temp == "not found") { temp = "f"; } trimera = util.isTrue(temp); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "increment"); if (temp == "not found") { temp = "5"; } util.mothurConvert(temp, increment); temp = validParameter.valid(parameters, "numwanted"); if (temp == "not found") { temp = "15"; } util.mothurConvert(temp, numwanted); temp = validParameter.valid(parameters, "dereplicate"); if (temp == "not found") { temp = "false"; } dups = util.isTrue(temp); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); if ((hasName || hasCount) && (templatefile != "self")) { m->mothurOut("You have provided a namefile or countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if ((namefile != "") || (groupfile != "")) { //convert to count string rootFileName = namefile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(rootFileName)); string outputFileName = getOutputFileName("count", variables); CountTable ct; ct.createTable(namefile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; } } } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); exit(1); } } //*************************************************************************************************************** int ChimeraSlayerCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); string trimFastaFileName = getOutputFileName("fasta", variables); string newCountFile = ""; //clears files ofstream out, out1, out2; util.openOutputFile(outputFileName, out); out.close(); util.openOutputFile(accnosFileName, out1); out1.close(); if (trim) { util.openOutputFile(trimFastaFileName, out2); out2.close(); } outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); } //maps a filename to priority map. //if no groupfile this is fastafileNames[s] -> prioirity //if groupfile then this is each groups seqs -> priority map > fileToPriority; map >::iterator itFile; map fileGroup; fileToPriority[fastafile] = priority; //default fileGroup[fastafile] = "noGroup"; int totalChimeras = 0; int numSeqs = 0; if (templatefile == "self") { setUpForSelfReference(fileGroup, fileToPriority); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } if (fileToPriority.size() == 1) { //you running without a groupfile itFile = fileToPriority.begin(); string thisFastaName = itFile->first; map thisPriority = itFile->second; numSeqs = driver(outputFileName, thisFastaName, accnosFileName, trimFastaFileName, thisPriority); if (m->getControl_pressed()) { outputTypes.clear(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } }else { //you have provided a groupfile if (hasCount) { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); newCountFile = getOutputFileName("count", variables); } numSeqs = driverGroups(outputFileName, accnosFileName, trimFastaFileName, fileToPriority, fileGroup, newCountFile); if (hasCount && dups) { CountTable c; c.readTable(countfile, true, false); if (!util.isBlank(newCountFile)) { ifstream in2; util.openInputFile(newCountFile, in2); string name, group; while (!in2.eof()) { in2 >> name >> group; gobble(in2); c.setAbund(name, group, 0); } in2.close(); } util.mothurRemove(newCountFile); c.printTable(newCountFile); } if (!dups) { totalChimeras = deconvoluteResults(outputFileName, accnosFileName, trimFastaFileName); m->mothurOut("\n" + toString(totalChimeras) + " chimera found.\n"); }else { if (hasCount) { unordered_set doNotRemove; CountTable c; c.readTable(newCountFile, true, true); //returns non zeroed names vector namesInTable = c.printTable(newCountFile); outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); for (int i = 0; i < namesInTable.size(); i++) { doNotRemove.insert(namesInTable[i]); } //remove names we want to keep from accnos file. unordered_set accnosNames = util.readAccnos(accnosFileName); ofstream out2; util.openOutputFile(accnosFileName, out2); for (auto it = accnosNames.begin(); it != accnosNames.end(); it++) { if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; } } out2.close(); } } } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n"); if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; if ((countfile != "") && (!dups)) { inputString += ", count=" + countfile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (countfile != "") { if (!dups) { //dereplicate=f, so remove sequences where any sample found the reads to be chimeric map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); string currentName = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], currentName); util.mothurRemove(filenames["count"][0]); outputNames.push_back(currentName); outputTypes["count"].push_back(currentName); }//else, mothur created a modified count file removing chimeras by sample. No need to include count file on remove.seqs command. Deconvolute function created modified count table already } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraSlayerCommand::deconvoluteResults(string outputFileName, string accnosFileName, string trimFileName){ try { unordered_set chimerasInFile = util.readAccnos(accnosFileName);//this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once unordered_set::iterator itChimeras; unordered_set::iterator itUnique; int total = 0; if (trimera) { //add in more potential uniqueNames unordered_set newUniqueNames = chimerasInFile; for (itUnique = chimerasInFile.begin(); itUnique != chimerasInFile.end(); itUnique++) { newUniqueNames.insert(*itUnique+"_LEFT"); newUniqueNames.insert(*itUnique+"_RIGHT"); } chimerasInFile = newUniqueNames; } ofstream out2; util.openOutputFile(accnosFileName, out2); for (itUnique = chimerasInFile.begin(); itUnique != chimerasInFile.end(); itUnique++) { out2 << *itUnique << endl; total++; } out2.close(); //edit chimera file ifstream in; util.openInputFile(outputFileName, in); ofstream out; util.openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); string rest, parent1, parent2, line; set namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once set::iterator itNames; //assumptions - in file each read will always look like... /* F11Fcsw_92754 no F11Fcsw_63104 F11Fcsw_33372 F11Fcsw_37007 0.89441 80.4469 0.2 1.03727 93.2961 52.2 no 0-241 243-369 */ //get header line if (!in.eof()) { line = util.getline(in); gobble(in); out << line << endl; } //for the chimera file, we want to make sure if any group finds a sequence to be chimeric then all groups do, //so if this is a report that did not find it to be chimeric, but it appears in the accnos file, //then ignore this report and continue until we find the report that found it to be chimeric while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove((outputFileName+".temp")); return 0; } string name = ""; in >> name; gobble(in); in >> parent1; gobble(in); if (name == "Name") { //name = "Name" because we append the header line each time we add results from the groups line = util.getline(in); gobble(in); }else { if (parent1 == "no") { //is this sequence really not chimeric?? itChimeras = chimerasInFile.find(name); if (itChimeras == chimerasInFile.end()) { //is this sequence not already in the file itNames = namesInFile.find(name); if (itNames == namesInFile.end()) { out << name << '\t' << "no" << endl; namesInFile.insert(name); } } }else { //read the rest of the line double DivQLAQRB,PerIDQLAQRB,BootStrapA,DivQLBQRA,PerIDQLBQRA,BootStrapB; string flag, range1, range2; bool print = false; in >> parent2 >> DivQLAQRB >> PerIDQLAQRB >> BootStrapA >> DivQLBQRA >> PerIDQLBQRA >> BootStrapB >> flag >> range1 >> range2; gobble(in); //is this name already in the file itNames = namesInFile.find((name)); if (itNames == namesInFile.end()) { //no not in file if (flag == "no") { //are you really a no?? //is this sequence really not chimeric?? itChimeras = chimerasInFile.find(name); //then you really are a no so print, otherwise skip if (itChimeras == chimerasInFile.end()) { print = true; } }else{ print = true; } } if (print) { namesInFile.insert(name); out << name << '\t' << parent1 << '\t' << parent1 << '\t' << DivQLAQRB << '\t' << PerIDQLAQRB << '\t' << BootStrapA << '\t' << DivQLBQRA << '\t' << PerIDQLBQRA << '\t' << BootStrapB << '\t' << flag << '\t' << range1 << '\t' << range2 << endl; } } } } in.close(); out.close(); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); //edit fasta file if (trim) { ifstream in3; util.openInputFile(trimFileName, in3); ofstream out3; util.openOutputFile(trimFileName+".temp", out3); namesInFile.clear(); while (!in3.eof()) { if (m->getControl_pressed()) { in3.close(); out3.close(); util.mothurRemove(outputFileName); util.mothurRemove(accnosFileName); util.mothurRemove((trimFileName+".temp")); return 0; } Sequence seq(in3); gobble(in3); if (seq.getName() != "") { itNames = namesInFile.find(seq.getName()); if (itNames == namesInFile.end()) { seq.printSequence(out3); } } } in3.close(); out3.close(); util.mothurRemove(trimFileName); rename((trimFileName+".temp").c_str(), trimFileName.c_str()); } return total; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "deconvoluteResults"); exit(1); } } //********************************************************************************************************************** int ChimeraSlayerCommand::setUpForSelfReference(map& fileGroup, map >& fileToPriority){ try { fileGroup.clear(); fileToPriority.clear(); if (!hasCount) { countfile = getCountFile(fastafile); } string newFastaFile = outputdir + util.getRootName(util.getSimpleName(fastafile)) + "-sortedTemp.fasta"; if (countfile == "") { //no groups //sort fastafile by abundance, returns new sorted fastafile name m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); priority = sortFastaFile(fastafile, countfile, newFastaFile); m->mothurOut("Done.\n"); fileToPriority[fastafile] = priority; fileGroup[fastafile] = "noGroup"; }else if (countfile != "") { CountTable ct; if (!ct.testGroups(countfile)) { //sort fastafile by abundance, returns new sorted fastafile name m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); priority = sortFastaFile(fastafile, countfile, newFastaFile); m->mothurOut("Done.\n"); fileToPriority[fastafile] = priority; fileGroup[fastafile] = "noGroup"; }else { //using count file with groups //Parse sequences by group current->setMothurCalling(true); SequenceCountParser parser(countfile, fastafile, nullVector); vector groups = parser.getNamesOfGroups(); current->setMothurCalling(false); for (int i = 0; i < groups.size(); i++) { vector thisGroupsFiles = parser.getFiles(groups[i]); string thisGroupsFastaFile = thisGroupsFiles[0]; string thisGroupsCountFile = thisGroupsFiles[1]; newFastaFile = outputdir + util.getRootName(util.getSimpleName(fastafile)) + groups[i] + "-sortedTemp.fasta"; priority = sortFastaFile(thisGroupsFastaFile, thisGroupsCountFile, newFastaFile); fileToPriority[newFastaFile] = priority; fileGroup[newFastaFile] = groups[i]; } } } return 0; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "setUpForSelfReference"); exit(1); } } //********************************************************************************************************************** string ChimeraSlayerCommand::getCountFile(string& inputFile){ try { string countFile = ""; m->mothurOut("\nNo namesfile given, running unique.seqs command to generate one.\n\n"); //use unique.seqs to create new name and fastafile string inputString = "format=count, fasta=" + inputFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); countFile = filenames["count"][0]; inputFile = filenames["fasta"][0]; return countFile; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "getCountFile"); exit(1); } } //********************************************************************************************************************** int ChimeraSlayerCommand::driverGroups(string outputFName, string accnos, string fasta, map >& fileToPriority, map& fileGroup, string countlist){ try { int totalSeqs = 0; ofstream outCountList; if (hasCount && dups) { util.openOutputFile(countlist, outCountList); } for (map >::iterator itFile = fileToPriority.begin(); itFile != fileToPriority.end(); itFile++) { if (m->getControl_pressed()) { return 0; } long start = time(nullptr); string thisFastaName = itFile->first; map thisPriority = itFile->second; string thisoutputFileName = outputdir + util.getRootName(util.getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.chimera"; string thisaccnosFileName = outputdir + util.getRootName(util.getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.accnos"; string thistrimFastaFileName = outputdir + util.getRootName(util.getSimpleName(thisFastaName)) + fileGroup[thisFastaName] + "slayer.fasta"; m->mothurOut("\nChecking sequences from group: " + fileGroup[thisFastaName] + ".\n"); lines.clear(); int numSeqs = driver(thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority); //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table //This table will zero out group counts for seqs determined to be chimeric by that group. if (dups) { if (!util.isBlank(thisaccnosFileName)) { ifstream in; util.openInputFile(thisaccnosFileName, in); string name; if (hasCount) { while (!in.eof()) { in >> name; gobble(in); outCountList << name << '\t' << fileGroup[thisFastaName] << endl; } in.close(); }else { map::iterator itGroupNameMap = group2NameFile.find(fileGroup[thisFastaName]); if (itGroupNameMap != group2NameFile.end()) { map thisnamemap; util.readNames(itGroupNameMap->second, thisnamemap); map::iterator itN; ofstream out; util.openOutputFile(thisaccnosFileName+".temp", out); while (!in.eof()) { in >> name; gobble(in); itN = thisnamemap.find(name); if (itN != thisnamemap.end()) { vector tempNames; util.splitAtComma(itN->second, tempNames); for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->setControl_pressed(true); } } out.close(); in.close(); util.renameFile(thisaccnosFileName+".temp", thisaccnosFileName); }else { m->mothurOut("[ERROR]: parsing cannot find " + fileGroup[thisFastaName] + ".\n"); m->setControl_pressed(true); } } } } //append files util.appendFiles(thisoutputFileName, outputFName); util.mothurRemove(thisoutputFileName); util.appendFiles(thisaccnosFileName, accnos); util.mothurRemove(thisaccnosFileName); if (trim) { util.appendFiles(thistrimFastaFileName, fasta); util.mothurRemove(thistrimFastaFileName); } util.mothurRemove(thisFastaName); totalSeqs += numSeqs; m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + fileGroup[thisFastaName] + ".\n"); } if (hasCount && dups) { outCountList.close(); } return totalSeqs; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "driverGroups"); exit(1); } } //********************************************************************************************************************** int ChimeraSlayerCommand::driver(string outputFName, string filename, string accnos, string fasta, map& priority){ try { if (m->getDebug()) { m->mothurOut("[DEBUG]: filename = " + filename + "\n"); } MothurChimera* chimera; if (templatefile != "self") { //you want to run slayer with a reference template chimera = new ChimeraSlayer(filename, templatefile, trim, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, util.getRandomNumber()); }else { chimera = new ChimeraSlayer(filename, templatefile, trim, priority, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, util.getRandomNumber()); } if (m->getControl_pressed()) { delete chimera; return 0; } if (chimera->getUnaligned()) { delete chimera; m->mothurOut("Your template sequences are different lengths, please correct.\n"); m->setControl_pressed(true); return 0; } templateSeqsLength = chimera->getLength(); ofstream out; util.openOutputFile(outputFName, out); ofstream out2; util.openOutputFile(accnos, out2); ofstream out3; if (trim) { util.openOutputFile(fasta, out3); } ifstream inFASTA; util.openInputFile(filename, inFASTA); chimera->printHeader(out); int count = 0; while (!inFASTA.eof()) { if (m->getControl_pressed()) { delete chimera; out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); string candidateAligned = candidateSeq->getAligned(); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getAligned().length() != templateSeqsLength) { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " is not the same length as the template sequences. Skipping.\n"); }else{ //find chimeras chimera->getChimeras(candidateSeq); if (m->getControl_pressed()) { delete chimera; delete candidateSeq; return 1; } //if you are not chimeric, then check each half data_results wholeResults = chimera->getResults(); //determine if we need to split bool isChimeric = false; if (wholeResults.flag == "yes") { string chimeraFlag = "no"; if( (wholeResults.results[0].bsa >= minBS && wholeResults.results[0].divr_qla_qrb >= divR) || (wholeResults.results[0].bsb >= minBS && wholeResults.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; } if (chimeraFlag == "yes") { if ((wholeResults.results[0].bsa >= minBS) || (wholeResults.results[0].bsb >= minBS)) { isChimeric = true; } } } if ((!isChimeric) && trimera) { //split sequence in half by bases string leftQuery, rightQuery; Sequence tempSeq(candidateSeq->getName(), candidateAligned); divideInHalf(tempSeq, leftQuery, rightQuery); //run chimeraSlayer on each piece Sequence* left = new Sequence(candidateSeq->getName(), leftQuery); Sequence* right = new Sequence(candidateSeq->getName(), rightQuery); //find chimeras chimera->getChimeras(left); data_results leftResults = chimera->getResults(); chimera->getChimeras(right); data_results rightResults = chimera->getResults(); //if either piece is chimeric then report Sequence trimmed = chimera->print(out, out2, leftResults, rightResults); if (trim) { trimmed.printSequence(out3); } delete left; delete right; }else { //already chimeric //print results Sequence trimmed = chimera->print(out, out2); if (trim) { trimmed.printSequence(out3); } } } count++; } delete candidateSeq; //report progress if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); } } //report progress if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+ "\n"); } out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); delete chimera; return count; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "driver"); exit(1); } } /**************************************************************************************************/ int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, string& rightQuery) { try { string queryUnAligned = querySeq.getUnaligned(); int numBases = int(queryUnAligned.length() * 0.5); string queryAligned = querySeq.getAligned(); leftQuery = querySeq.getAligned(); rightQuery = querySeq.getAligned(); int baseCount = 0; int leftSpot = 0; for (int i = 0; i < queryAligned.length(); i++) { //if you are a base if (isalpha(queryAligned[i])) { baseCount++; } //if you have half if (baseCount >= numBases) { leftSpot = i; break; } //first half } //blank out right side for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; } //blank out left side for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; } return 0; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "divideInHalf"); exit(1); } } /**************************************************************************************************/ //sort by abundance, no groups provided map ChimeraSlayerCommand::sortFastaFile(string thisfastafile, string thisdupsfile, string newFile) { try { map nameAbund; vector nameVector; if (hasCount) { CountTable ct; ct.readTable(thisdupsfile, false, false); nameAbund = ct.getNameMap(); } ifstream in; util.openInputFile(thisfastafile, in); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return nameAbund; } Sequence seq(in); gobble(in); map::iterator itNameMap = nameAbund.find(seq.getName()); if (itNameMap == nameAbund.end()){ m->setControl_pressed(true); m->mothurOut("[ERROR]: " + seq.getName() + " is in your fastafile, but is not in your countfile, please correct.\n"); }else { int num = itNameMap->second; seqPriorityNode temp(num, seq.getAligned(), seq.getName()); nameVector.push_back(temp); } } in.close(); //sort by num represented sort(nameVector.begin(), nameVector.end(), compareSeqPriorityNodes); if (m->getControl_pressed()) { return nameAbund; } ofstream out; util.openOutputFile(newFile, out); //print new file in order of for (int i = 0; i < nameVector.size(); i++) { out << ">" << nameVector[i].name << endl << nameVector[i].seq << endl; } out.close(); return nameAbund; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/chimeraslayercommand.h000077500000000000000000000043531424121717000225720ustar00rootroot00000000000000#ifndef CHIMERASLAYERCOMMAND_H #define CHIMERASLAYERCOMMAND_H /* * chimeraslayercommand.h * Mothur * * Created by westcott on 3/31/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "mothurchimera.h" #include "chimeraslayer.h" #include "sequenceparser.h" #include "sequencecountparser.h" /***********************************************************/ class ChimeraSlayerCommand : public Command { public: ChimeraSlayerCommand(string); ~ChimeraSlayerCommand() = default; vector setParameters(); string getCommandName() { return "chimera.slayer"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res 21:494.\nhttp://www.mothur.org/wiki/Chimera.slayer"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector processIDS; //processid vector lines; int driver(string, string, string, string, map&); int divideInHalf(Sequence, string&, string&); map sortFastaFile(string fasta, string dups, string newFile); string getCountFile(string&); int deconvoluteResults(string, string, string); int setUpForSelfReference(map&, map >&); int driverGroups(string, string, string, map >&, map&, string); bool abort, realign, trim, trimera, hasCount, dups, removeChimeras; string fastafile, templatefile, countfile; int window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, templateSeqsLength; long long numSeqs; float divR; map priority; map group2NameFile; vector outputNames; }; /***********************************************************/ #endif mothur-1.48.0/source/commands/chimerauchimecommand.cpp000066400000000000000000002107201424121717000230720ustar00rootroot00000000000000/* * chimerauchimecommand.cpp * Mothur * * Created by westcott on 5/13/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "chimerauchimecommand.h" #include "uniqueseqscommand.h" #include "sequence.hpp" #include "systemcommand.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraUchimeCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter puchimelocation("uchime", "String", "", "", "", "", "","",false,false); parameters.push_back(puchimelocation); CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew); CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","alns",false,false); parameters.push_back(premovechimeras); CommandParameter pminh("minh", "Number", "", "0.3", "", "", "","",false,false); parameters.push_back(pminh); CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pmindiv); CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn); CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn); CommandParameter pxa("xa", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pxa); CommandParameter pchunks("chunks", "Number", "", "4", "", "", "","",false,false); parameters.push_back(pchunks); CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "","",false,false); parameters.push_back(pminchunk); CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "","",false,false); parameters.push_back(pidsmoothwindow); CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxp); CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps); CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pskipgaps2); CommandParameter pminlen("minlen", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlen); CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pmaxlen); CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pucl); CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pqueryfract); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["alns"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraUchimeCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n"; helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, removechimeras abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "If the dereplicate parameter is false, then if one group finds the sequence to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The uchime parameter allows you to specify the name and location of your uchime executable. By default mothur will look in your path and mothur's executable and mothur tools locations. You can set the uchime location as follows, uchime=/usr/bin/uchime.\n"; helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n"; helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n"; helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n"; helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n"; helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n"; helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n"; helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n"; helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n"; helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n"; //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n"; helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n"; helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n"; helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n"; helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n"; helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n"; helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n"; helpString += "The chimera.uchime command should be in the following format: \n"; helpString += "chimera.uchime(fasta=yourFastaFile, reference=yourTemplate) \n"; helpString += "Example: chimera.uchime(fasta=AD.align, reference=silva.gold.align) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraUchimeCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "... uchime file does not exist. mothur requires the uchime executable."; issues.push_back(issue); string ianswer = "\tThe chimera.uchime command is a wrapper for the uchime program, http://drive5.com/usearch/manual/uchime_algo.html. We distribute the uchime executable with the executable versions of mothur. By default, mothur will look for uchime in the same location mothur's executable is as well as looking in your $PATH variable.\n"; ianswers.push_back(ianswer); string howto = "How do I use the dereplicate parameter?"; howtos.push_back(howto); string hanswer = "\tThe dereplicate parameter can be used when checking for chimeras by group. If the dereplicate parameter is false, then if one group finds the sequence to be chimeric, then all groups find it to be chimeric, default=f. If you set dereplicate=t, and then when a sequence is found to be chimeric it is removed from it’s group, not the entire dataset.\n\nNote: When you set dereplicate=t, mothur generates a new count table with the chimeras removed and counts adjusted by sample. It is important to note if you set dereplicate=true, do NOT include the count file with the remove.seqs command. For a detailed example, please reference https://mothur.org/wiki/chimera_dereplicate_example/\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string ChimeraUchimeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],[tag],uchime.chimeras"; } else if (type == "accnos") { pattern = "[filename],[tag],uchime.accnos"; } else if (type == "alns") { pattern = "[filename],[tag],uchime.alns"; } else if (type == "fasta") { pattern = "[filename],[tag],uchime.fasta"; } else if (type == "count") { pattern = "[filename],[tag],uchime.count_table-[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraUchimeCommand::ChimeraUchimeCommand(string option) : Command() { try { hasCount=false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } bool hasName = false; string namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } if (namefile != "") { hasName = true; } //check for required parameters countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "") { hasCount = true; } //make sure there is at least one valid file left if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } bool hasGroup = false; string groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); hasGroup = true; } if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } map::iterator it = parameters.find("reference"); //user has given a template file if(it != parameters.end()){ if (it->second == "self") { templatefile = "self"; } else { templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not open") { abort = true; } else if (templatefile == "not found") { //check for saved reference sequences m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; } } }else if ((hasName) || (hasCount) || (hasGroup)) { templatefile = "self"; } else { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); templatefile = ""; abort = true; } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); abskew = validParameter.valid(parameters, "abskew"); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring.\n"); useAbskew = false; } temp = validParameter.valid(parameters, "chimealns"); if (temp == "not found") { temp = "f"; } chimealns = util.isTrue(temp); minh = validParameter.valid(parameters, "minh"); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; } mindiv = validParameter.valid(parameters, "mindiv"); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; } xn = validParameter.valid(parameters, "xn"); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; } dn = validParameter.valid(parameters, "dn"); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; } xa = validParameter.valid(parameters, "xa"); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; } chunks = validParameter.valid(parameters, "chunks"); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; } minchunk = validParameter.valid(parameters, "minchunk"); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; } idsmoothwindow = validParameter.valid(parameters, "idsmoothwindow"); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; } maxp = validParameter.valid(parameters, "maxp"); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; } minlen = validParameter.valid(parameters, "minlen"); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; } maxlen = validParameter.valid(parameters, "maxlen"); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; } strand = validParameter.valid(parameters, "strand"); if (strand == "not found") { strand = ""; } temp = validParameter.valid(parameters, "ucl"); if (temp == "not found") { temp = "f"; } ucl = util.isTrue(temp); queryfract = validParameter.valid(parameters, "queryfract"); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; } if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring.\n"); useQueryfract = false; } temp = validParameter.valid(parameters, "skipgaps"); if (temp == "not found") { temp = "t"; } skipgaps = util.isTrue(temp); temp = validParameter.valid(parameters, "skipgaps2"); if (temp == "not found") { temp = "t"; } skipgaps2 = util.isTrue(temp); temp = validParameter.valid(parameters, "dereplicate"); if (temp == "not found") { temp = "false"; } dups = util.isTrue(temp); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } vector versionOutputs; bool foundTool = false; string programName = "uchime"; programName += EXECUTABLE_EXT; uchimeLocation = validParameter.validFile(parameters, "uchime"); if (uchimeLocation == "not found") { uchimeLocation = ""; foundTool = util.findTool(programName, uchimeLocation, versionOutputs, current->getLocations()); } else { //test to make sure uchime exists ifstream in; uchimeLocation = util.getFullPathName(uchimeLocation); foundTool = util.openInputFile(uchimeLocation, in, "no error"); in.close(); if(!foundTool) { m->mothurOut(uchimeLocation + " file does not exist or cannot be opened, ignoring.\n"); uchimeLocation = ""; foundTool = util.findTool(programName, uchimeLocation, versionOutputs, current->getLocations()); } } if (!foundTool) { abort = true; } uchimeLocation = util.getFullPathName(uchimeLocation); if (m->getDebug()) { m->mothurOut("[DEBUG]: uchime location using " + uchimeLocation + "\n"); } if (!abort) { if ((namefile != "") || (groupfile != "")) { //convert to count string rootFileName = namefile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(rootFileName)); string outputFileName = getOutputFileName("count", variables); CountTable ct; ct.createTable(namefile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; hasCount = true; } } } } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand"); exit(1); } } /**************************************************************************************************/ struct uchimeData { string fastafile; string dupsfile; string outputFName; string accnos, alns, formattedFastaFilename, templatefile, uchimeLocation; string driverAccnos, driverAlns, driverOutputFName; map > parsedFiles; map > seqs2RemoveByGroup; int count, numChimeras; vector groups; uchimeVariables* vars; MothurOut* m; Utils util; uchimeData(){} uchimeData(map > g2f, string o, string uloc, string t, string file, string f, string n, string ac, string al, vector gr, uchimeVariables* vs) { fastafile = f; dupsfile = n; formattedFastaFilename = file; outputFName = o; templatefile = t; accnos = ac; alns = al; m = MothurOut::getInstance(); groups = gr; count = 0; numChimeras = 0; uchimeLocation = uloc; vars = vs; driverAccnos = ac; driverAlns = al; driverOutputFName = o; parsedFiles = g2f; } void setDriverNames(string o, string al, string ac) { driverAccnos = ac; driverAlns = al; driverOutputFName = o; } }; //********************************************************************************************************************** int driver(uchimeData* params){ try { params->driverOutputFName = params->util.getFullPathName(params->driverOutputFName); params->formattedFastaFilename = params->util.getFullPathName(params->formattedFastaFilename); params->driverAlns = params->util.getFullPathName(params->driverAlns); //to allow for spaces in the path params->driverOutputFName = "\"" + params->driverOutputFName + "\""; params->formattedFastaFilename = "\"" + params->formattedFastaFilename + "\""; params->driverAlns = "\"" + params->driverAlns + "\""; if (params->formattedFastaFilename.length() > 257) { params->m->mothurOut("[ERROR]: " + params->formattedFastaFilename + " filename is " + toString(params->formattedFastaFilename.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); params->m->setControl_pressed(true); return 0; }else if ((params->driverAlns.length() > 257) && (params->vars->chimealns)) { params->m->mothurOut("[ERROR]: " + params->driverAlns + " filename is " + toString(params->driverAlns.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); params->m->setControl_pressed(true); return 0; }else if (params->driverOutputFName.length() > 257) { params->m->mothurOut("[ERROR]: " + params->driverOutputFName + " filename is " + toString(params->driverOutputFName.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct input file name.\n"); params->m->setControl_pressed(true); return 0; } vector cPara; string uchimeCommand = params->uchimeLocation; uchimeCommand = "\"" + uchimeCommand + "\" "; cPara.push_back(params->util.mothurConvert(uchimeCommand)); //are you using a reference file if (params->templatefile != "self") { string outputFileName = params->formattedFastaFilename.substr(1, params->formattedFastaFilename.length()-2) + ".uchime_formatted"; ifstream in; params->util.openInputFile(params->formattedFastaFilename.substr(1, params->formattedFastaFilename.length()-2), in); ofstream out; params->util.openOutputFile(outputFileName, out); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { seq.printSequence(out); } } in.close(); out.close(); params->formattedFastaFilename = outputFileName; params->formattedFastaFilename = "\"" + params->formattedFastaFilename + "\""; cPara.push_back(params->util.mothurConvert("--db")); cPara.push_back(params->util.mothurConvert(params->templatefile)); } //input filename cPara.push_back(params->util.mothurConvert("--input")); cPara.push_back(params->util.mothurConvert(params->formattedFastaFilename)); //output filename cPara.push_back(params->util.mothurConvert("--uchimeout")); cPara.push_back(params->util.mothurConvert(params->driverOutputFName)); if (params->vars->chimealns) { //output alns filename cPara.push_back(params->util.mothurConvert("--uchimealns")); cPara.push_back(params->util.mothurConvert(params->driverAlns)); } //strand if (params->vars->strand != "") { cPara.push_back(params->util.mothurConvert("--strand")); cPara.push_back(params->util.mothurConvert(params->vars->strand)); } if (params->vars->useAbskew) { cPara.push_back(params->util.mothurConvert("--abskew")); cPara.push_back(params->util.mothurConvert(params->vars->abskew)); } if (params->vars->useMinH) { cPara.push_back(params->util.mothurConvert("--minh")); cPara.push_back(params->util.mothurConvert(params->vars->minh)); } if (params->vars->useMindiv) { cPara.push_back(params->util.mothurConvert("--mindiv")); cPara.push_back(params->util.mothurConvert(params->vars->mindiv)); } if (params->vars->useXn) { cPara.push_back(params->util.mothurConvert("--xn")); cPara.push_back(params->util.mothurConvert(params->vars->xn)); } if (params->vars->useDn) { cPara.push_back(params->util.mothurConvert("--dn")); cPara.push_back(params->util.mothurConvert(params->vars->dn)); } if (params->vars->useXa) { cPara.push_back(params->util.mothurConvert("--xa")); cPara.push_back(params->util.mothurConvert(params->vars->xa)); } if (params->vars->useChunks) { cPara.push_back(params->util.mothurConvert("--chunks")); cPara.push_back(params->util.mothurConvert(params->vars->chunks)); } if (params->vars->useMinchunk) { cPara.push_back(params->util.mothurConvert("--minchunk")); cPara.push_back(params->util.mothurConvert(params->vars->minchunk)); } if (params->vars->useIdsmoothwindow) { cPara.push_back(params->util.mothurConvert("--idsmoothwindow")); cPara.push_back(params->util.mothurConvert(params->vars->idsmoothwindow)); } if (params->vars->useMaxp) { cPara.push_back(params->util.mothurConvert("--maxp")); cPara.push_back(params->util.mothurConvert(params->vars->maxp)); } if (!params->vars->skipgaps) { cPara.push_back(params->util.mothurConvert("--noskipgaps")); } if (!params->vars->skipgaps2) { cPara.push_back(params->util.mothurConvert("--noskipgaps2")); } if (params->vars->useMinlen) { cPara.push_back(params->util.mothurConvert("--minlen")); cPara.push_back(params->util.mothurConvert(params->vars->minlen)); } if (params->vars->useMaxlen) { cPara.push_back(params->util.mothurConvert("--maxlen")); cPara.push_back(params->util.mothurConvert(params->vars->maxlen)); } if (params->vars->ucl) { cPara.push_back(params->util.mothurConvert("--ucl")); } if (params->vars->useQueryfract) { cPara.push_back(params->util.mothurConvert("--queryfract")); cPara.push_back(params->util.mothurConvert(params->vars->queryfract)); } char** uchimeParameters; uchimeParameters = new char*[cPara.size()]; string commandString = ""; for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } //int numArgs = cPara.size(); //uchime_main(numArgs, uchimeParameters); #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] uchimeParameters; //remove "" from filenames params->driverOutputFName = params->driverOutputFName.substr(1, params->driverOutputFName.length()-2); params->formattedFastaFilename = params->formattedFastaFilename.substr(1, params->formattedFastaFilename.length()-2); params->driverAlns = params->driverAlns.substr(1, params->driverAlns.length()-2); if (params->m->getControl_pressed()) { return 0; } //create accnos file from uchime results ifstream in; params->util.openInputFile(params->driverOutputFName, in); ofstream out; params->util.openOutputFile(params->driverAccnos, out); int num = 0; params->numChimeras = 0; while(!in.eof()) { if (params->m->getControl_pressed()) { break; } string name = ""; string chimeraFlag = ""; string line = params->util.getline(in); gobble(in); vector pieces = params->util.splitWhiteSpace(line); if (pieces.size() > 2) { name = pieces[1]; //fix name if needed if (params->templatefile == "self") { name = name.substr(0, name.length()-1); //rip off last / name = name.substr(0, name.find_last_of('/')); } chimeraFlag = pieces[pieces.size()-1]; } if (chimeraFlag == "Y") { out << name << endl; params->numChimeras++; } num++; } in.close(); out.close(); return num; } catch(exception& e) { params->m->errorOut(e, "ChimeraUchimeCommand", "driver"); exit(1); } } //*************************************************************************************************************** int ChimeraUchimeCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n"); vars = new uchimeVariables(); vars->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); vars->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand); m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); string alnsFileName = getOutputFileName("alns", variables); string newFasta = util.getRootName(fastafile) + "temp"; string newCountFile = ""; //you provided a groupfile bool hasGroups = false; vector groups; if (hasCount) { CountTable ct; if (ct.testGroups(countfile, groups)) { hasGroups = true; } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); newCountFile = getOutputFileName("count", variables); } if ((templatefile == "self") && (!hasGroups)) { //you want to run uchime with a template=self and no groups if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing.\n"); processors = 1; } if (hasCount) { } else { countfile = getCountFile(fastafile); hasCount = true; } map seqs; readFasta(fastafile, seqs); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //read namefile vector nameMapCount; int error = 0; if (hasCount) { CountTable ct; ct.readTable(countfile, true, false); for(map::iterator it = seqs.begin(); it != seqs.end(); it++) { int num = ct.getNumSeqs(it->first); if (num == 0) { error = 1; } else { seqPriorityNode temp(num, it->second, it->first); nameMapCount.push_back(temp); } } } if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your countfile, aborting.\n"); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } util.printVsearchFile(nameMapCount, newFasta, "/ab=", "/"); fastafile = newFasta; } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } if (hasGroups) { if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } vector groups; map > group2Files; if (hasCount) { current->setMothurCalling(true); SequenceCountParser cparser(countfile, fastafile, nullVector); current->setMothurCalling(false); groups = cparser.getNamesOfGroups(); group2Files = cparser.getFiles(); } //clears files ofstream out, out1, out2; util.openOutputFile(outputFileName, out); out.close(); util.openOutputFile(accnosFileName, out1); out1.close(); if (chimealns) { util.openOutputFile(alnsFileName, out2); out2.close(); } map > seqs2RemoveByGroup; int totalSeqs = createProcessesGroups(group2Files, outputFileName, newFasta, accnosFileName, alnsFileName, groups, seqs2RemoveByGroup); if (hasCount && dups) { CountTable newCount; newCount.readTable(countfile, true, false); for (map >::iterator it = seqs2RemoveByGroup.begin(); it != seqs2RemoveByGroup.end(); it++) { string group = it->first; for (int k = 0; k < it->second.size(); k++) { newCount.setAbund(it->second[k], group, 0); } } newCount.printTable(newCountFile); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } if (!dups) { int totalChimeras = deconvoluteResults(outputFileName, accnosFileName, alnsFileName); m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.\n"); m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples.\n"); }else { if (hasCount) { unordered_set doNotRemove; CountTable c; c.readTable(newCountFile, true, true); //returns non zeroed names vector namesInTable = c.printTable(newCountFile); outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); for (int i = 0; i < namesInTable.size(); i++) { doNotRemove.insert(namesInTable[i]); } //remove names we want to keep from accnos file. unordered_set accnosNames = util.readAccnos(accnosFileName); ofstream out2; util.openOutputFile(accnosFileName, out2); for (auto it = accnosNames.begin(); it != accnosNames.end(); it++) { if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; } } out2.close(); } } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } }else{ if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } int numSeqs = 0; int numChimeras = 0; map > dummy; uchimeData* dataBundle = new uchimeData(dummy, outputFileName, uchimeLocation, templatefile, fastafile, fastafile, countfile, accnosFileName, alnsFileName, nullVector, vars); numSeqs = driver(dataBundle); numChimeras = dataBundle->numChimeras; delete dataBundle; //add headings ofstream out; util.openOutputFile(outputFileName+".temp", out); out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; out.close(); util.appendFiles(outputFileName, outputFileName+".temp"); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } delete vars; return 0; } //remove file made for uchime if (templatefile == "self") { util.mothurRemove(fastafile); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found.\n"); } outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); } delete vars; if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; if ((countfile != "") && (!dups)) { inputString += ", count=" + countfile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (countfile != "") { if (!dups) { //dereplicate=f, so remove sequences where any sample found the reads to be chimeric map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string currentName = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], currentName); util.mothurRemove(filenames["count"][0]); outputNames.push_back(currentName); outputTypes["count"].push_back(currentName); }//else, mothur created a modified count file removing chimeras by sample. No need to include count file on remove.seqs command. Deconvolute function created modified count table already } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraUchimeCommand::deconvoluteResults(string outputFileName, string accnosFileName, string alnsFileName){ try { int total = 0; ofstream out2; util.openOutputFile(accnosFileName+".temp", out2); string name; set namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once set::iterator itNames; set chimerasInFile; set::iterator itChimeras; if (!util.isBlank(accnosFileName)) { //edit accnos file ifstream in2; util.openInputFile(accnosFileName, in2); while (!in2.eof()) { if (m->getControl_pressed()) { in2.close(); out2.close(); util.mothurRemove(outputFileName); util.mothurRemove((accnosFileName+".temp")); return 0; } in2 >> name; gobble(in2); itChimeras = chimerasInFile.find(name); if (itChimeras == chimerasInFile.end()) { out2 << name << endl; chimerasInFile.insert(name); total++; } } in2.close(); } out2.close(); util.mothurRemove(accnosFileName); rename((accnosFileName+".temp").c_str(), accnosFileName.c_str()); //edit chimera file ifstream in; util.openInputFile(outputFileName, in); ofstream out; util.openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; float temp1; string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag; name = ""; namesInFile.clear(); //assumptions - in file each read will always look like - if uchime source is updated, revisit this code. /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N */ while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove((outputFileName+".temp")); return 0; } bool print = false; in >> temp1; gobble(in); in >> name; gobble(in); in >> parent1; gobble(in); in >> parent2; gobble(in); in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag; gobble(in); //parse name - name will look like U68590/ab=1/ string restOfName = ""; int pos = name.find_first_of('/'); if (pos != string::npos) { restOfName = name.substr(pos); name = name.substr(0, pos); } //is this name already in the file itNames = namesInFile.find((name)); if (itNames == namesInFile.end()) { //no not in file if (flag == "N") { //are you really a no?? //is this sequence really not chimeric?? itChimeras = chimerasInFile.find(name); //then you really are a no so print, otherwise skip if (itChimeras == chimerasInFile.end()) { print = true; } }else{ print = true; } } if (print) { out << temp1 << '\t' << name << restOfName << '\t'; namesInFile.insert(name); //parse parent1 names if (parent1 != "*") { restOfName = ""; pos = parent1.find_first_of('/'); if (pos != string::npos) { restOfName = parent1.substr(pos); parent1 = parent1.substr(0, pos); } out << parent1 << restOfName << '\t'; }else { out << parent1 << '\t'; } //parse parent2 names if (parent2 != "*") { restOfName = ""; pos = parent2.find_first_of('/'); if (pos != string::npos) { restOfName = parent2.substr(pos); parent2 = parent2.substr(0, pos); } out << parent2 << restOfName << '\t'; }else { out << parent2 << '\t'; } out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << '\t' << temp13 << '\t' << flag << endl; } } in.close(); out.close(); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); //edit anls file //assumptions - in file each read will always look like - if uchime source is updated, revisit this code. /* ------------------------------------------------------------------------ Query ( 179 nt) F21Fcsw_11639/ab=591/ ParentA ( 179 nt) F11Fcsw_6529/ab=1625/ ParentB ( 181 nt) F21Fcsw_12128/ab=1827/ A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160 Diffs NNN N N N N N BB NNN Votes 000 0 0 0 0 0 ++ 000 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB A 159 TGGAACTGAGACACGGTCCAA 179 Q 159 TGGAACTGAGACACGGTCCAA 179 B 161 TGGAACTGAGACACGGTCCAA 181 Diffs Votes Model BBBBBBBBBBBBBBBBBBBBB Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5% Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047 */ if (chimealns) { ifstream in3; util.openInputFile(alnsFileName, in3); ofstream out3; util.openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint); name = ""; namesInFile.clear(); string line = ""; while (!in3.eof()) { if (m->getControl_pressed()) { in3.close(); out3.close(); util.mothurRemove(outputFileName); util.mothurRemove((accnosFileName)); util.mothurRemove((alnsFileName+".temp")); return 0; } line = ""; line = util.getline(in3); string temp = ""; if (line != "") { istringstream iss(line); iss >> temp; //are you a name line if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) { int spot = 0; for (int i = 0; i < line.length(); i++) { spot = i; if (line[i] == ')') { break; } else { out3 << line[i]; } } if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + ".\n"); m->setControl_pressed(true); } else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + ".\n"); m->setControl_pressed(true); } else { out << line[spot] << line[spot+1]; name = line.substr(spot+2); //parse name - name will either look like U68590/ab=1/ or U68590 string restOfName = ""; int pos = name.find_first_of('/'); if (pos != string::npos) { restOfName = name.substr(pos); name = name.substr(0, pos); } //only limit repeats on query names if (temp == "Query") { itNames = namesInFile.find(name); if (itNames == namesInFile.end()) { out << name << restOfName << endl; namesInFile.insert(name); } }else { out << name << restOfName << endl; } } }else { //not need to alter line out3 << line << endl; } }else { out3 << endl; } } in3.close(); out3.close(); util.mothurRemove(alnsFileName); rename((alnsFileName+".temp").c_str(), alnsFileName.c_str()); } return total; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults"); exit(1); } } //********************************************************************************************************************** int ChimeraUchimeCommand::readFasta(string filename, map& seqs){ try { //create input file for uchime //read through fastafile and store info ifstream in; util.openInputFile(filename, in); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } Sequence seq(in); gobble(in); seqs[seq.getName()] = seq.getAligned(); } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** string ChimeraUchimeCommand::getCountFile(string& inputFile){ try { string countFile = ""; m->mothurOut("\nNo count file given, running unique.seqs command to generate one.\n\n"); //use unique.seqs to create new name and fastafile string inputString = "format=count, fasta=" + inputFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); countFile = filenames["count"][0]; inputFile = filenames["fasta"][0]; return countFile; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "getCountFile"); exit(1); } } //********************************************************************************************************************** int getSeqs(map& nameMap, string thisGroupsFormattedOutputFilename, string tag, string tag2, long long& numSeqs, string thisGroupsFastaFile, MothurOut* m){ try { int error = 0; ifstream in; Utils util; util.openInputFile(thisGroupsFastaFile, in); vector nameVector; map::iterator itNameMap; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); itNameMap = nameMap.find(seq.getName()); if (itNameMap == nameMap.end()){ error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fastafile, but is not in your name or count file, please correct.\n"); }else { int num = itNameMap->second; seqPriorityNode temp(num, seq.getUnaligned(), seq.getName()); nameVector.push_back(temp); } } in.close(); if (error == 1) { return 1; } numSeqs = nameVector.size(); util.printVsearchFile(nameVector, thisGroupsFormattedOutputFilename, tag, tag2); return error; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getSeqs"); exit(1); } } //********************************************************************************************************************** void driverGroups(uchimeData* params){ try { int totalSeqs = 0; for (map >::iterator it = params->parsedFiles.begin(); it != params->parsedFiles.end(); it++) { long start = time(nullptr); if (params->m->getControl_pressed()) { break; } int error; long long numSeqs = 0; string thisGroup = it->first; map nameMap; if (params->vars->hasCount) { CountTable ct; ct.readTable(it->second[1], false, true); nameMap = ct.getNameMap(); } else { nameMap = params->util.readNames(it->second[1]); } error = getSeqs(nameMap, params->formattedFastaFilename, "/ab=", "/", numSeqs, it->second[0], params->m); if ((error == 1) || params->m->getControl_pressed()) { break; } totalSeqs += numSeqs; params->setDriverNames((params->outputFName + thisGroup), (params->alns+thisGroup), (params->accnos+thisGroup)); driver(params); if (params->m->getControl_pressed()) { break; } //remove file made for uchime if (!params->m->getDebug()) { params->util.mothurRemove(params->formattedFastaFilename); } else { params->m->mothurOut("[DEBUG]: saving file: " + params->formattedFastaFilename + ".\n"); } //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table //This table will zero out group counts for seqs determined to be chimeric by that group. if (params->vars->dups) { if (!params->util.isBlank(params->accnos+thisGroup)) { ifstream in; params->util.openInputFile(params->accnos+thisGroup, in); string name; if (params->vars->hasCount) { vector namesOfChimeras; while (!in.eof()) { in >> name; gobble(in); namesOfChimeras.push_back(name); } in.close(); params->seqs2RemoveByGroup[thisGroup] = namesOfChimeras; }else { map thisnamemap; params->util.readNames(it->second[1], thisnamemap); map::iterator itN; ofstream out; params->util.openOutputFile(params->accnos+thisGroup+".temp", out); while (!in.eof()) { in >> name; gobble(in); itN = thisnamemap.find(name); if (itN != thisnamemap.end()) { vector tempNames; params->util.splitAtComma(itN->second, tempNames); for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } }else { params->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); params->m->setControl_pressed(true); } } out.close(); in.close(); params->util.renameFile(params->accnos+thisGroup+".temp", params->accnos+thisGroup); } } } //append files params->util.appendFiles((params->outputFName+thisGroup), params->outputFName); params->util.mothurRemove((params->outputFName+thisGroup)); params->util.appendFiles((params->accnos+thisGroup), params->accnos); params->util.mothurRemove((params->accnos+thisGroup)); if (params->vars->chimealns) { params->util.appendFiles((params->alns+thisGroup), params->alns); params->util.mothurRemove((params->alns+thisGroup)); } params->m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + thisGroup + ".\n"); } params->count = totalSeqs; } catch(exception& e) { params->m->errorOut(e, "ChimeraUchimeCommand", "driverGroups"); exit(1); } } /**************************************************************************************************/ int ChimeraUchimeCommand::createProcessesGroups(map >& groups2Files, string outputFName, string filename, string accnos, string alns, vector groups, map >& seqs2RemoveByGroup) { try { //sanity check if (groups.size() < processors) { processors = groups.size(); m->mothurOut("Reducing processors to " + toString(groups.size()) + ".\n"); } //divide the groups between the processors vector lines; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; long long num = 0; time_t start, end; time(&start); //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[i+1].start; j < lines[i+1].end; j++) { map >::iterator it = groups2Files.find(groups[j]); if (it != groups2Files.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } uchimeData* dataBundle = new uchimeData(thisGroupsParsedFiles, outputFName+extension, uchimeLocation, templatefile, filename+extension, fastafile, countfile, accnos+extension, alns+extension, thisGroups, vars); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverGroups, dataBundle)); } vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[0].start; j < lines[0].end; j++) { map >::iterator it = groups2Files.find(groups[j]); if (it != groups2Files.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } uchimeData* dataBundle = new uchimeData(thisGroupsParsedFiles, outputFName, uchimeLocation, templatefile, filename, fastafile, countfile, accnos, alns, thisGroups, vars); driverGroups(dataBundle); num = dataBundle->count; int numChimeras = dataBundle->numChimeras; seqs2RemoveByGroup = dataBundle->seqs2RemoveByGroup; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; numChimeras += data[i]->numChimeras; for (map >::iterator it = data[i]->seqs2RemoveByGroup.begin(); it != data[i]->seqs2RemoveByGroup.end(); it++) { map >::iterator itSanity = seqs2RemoveByGroup.find(it->first); if (itSanity == seqs2RemoveByGroup.end()) { //we haven't seen this group, should always be true seqs2RemoveByGroup[it->first] = it->second; } } string extension = toString(i+1) + ".temp"; util.appendFiles((outputFName+extension), outputFName); util.mothurRemove((outputFName+extension)); util.appendFiles((accnos+extension), accnos); util.mothurRemove((accnos+extension)); delete data[i]; delete workerThreads[i]; } delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to check " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/chimerauchimecommand.h000077500000000000000000000077311424121717000225500ustar00rootroot00000000000000#ifndef CHIMERAUCHIMECOMMAND_H #define CHIMERAUCHIMECOMMAND_H /* * chimerauchimecommand.h * Mothur * * Created by westcott on 5/13/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequenceparser.h" #include "counttable.h" #include "sequencecountparser.h" /***********************************************************/ struct uchimeVariables { bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount; string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand; uchimeVariables() = default; void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) { useAbskew = Abskew; chimealns = calns; useMinH = MinH; useMindiv = Mindiv; useXn = Xn; useDn = Dn; useXa = Xa; useChunks = Chunks; useMinchunk = Minchunk; useIdsmoothwindow = Idsmoothwindow; useMinsmoothid = Minsmoothid; useMaxp = Maxp; skipgaps = skipgap; skipgaps2 = skipgap2; useMinlen = Minlen; useMaxlen = Maxlen; ucl = uc; useQueryfract = Queryfract; hasCount = hc; dups = dps; } void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) { abskew = abske; minh = min; mindiv = mindi; strand = stra; xn = x; dn = d; xa = xa2; chunks = chunk; minchunk = minchun; idsmoothwindow = idsmoothwindo; minsmoothid = minsmoothi; maxp = max; minlen = minle; maxlen = maxle; queryfract = queryfrac; } }; /***********************************************************/ class ChimeraUchimeCommand : public Command { public: ChimeraUchimeCommand(string); ~ChimeraUchimeCommand() = default; vector setParameters(); string getCommandName() { return "chimera.uchime"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/usearch/manual/uchime_algo.html\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection. Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, dups, removeChimeras; string fastafile, templatefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand; int processors; vector outputNames; uchimeVariables* vars; string getCountFile(string&); int readFasta(string, map&); int deconvoluteResults(string, string, string); int createProcessesGroups(map >&, string, string, string, string, vector, map >&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/chimeravsearchcommand.cpp000066400000000000000000001742071424121717000232640ustar00rootroot00000000000000// // chimeravsearchcommand.cpp // Mothur // // Created by Sarah Westcott on 6/16/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "chimeravsearchcommand.h" #include "uniqueseqscommand.h" #include "sequence.hpp" #include "systemcommand.h" #include "degapseqscommand.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChimeraVsearchCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew); CommandParameter pchimealns("uchimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns); CommandParameter premovechimeras("removechimeras", "Boolean", "", "t", "", "", "","alns",false,false); parameters.push_back(premovechimeras); CommandParameter pminh("minh", "Number", "", "0.28", "", "", "","",false,false); parameters.push_back(pminh); CommandParameter pmindiv("mindiv", "Number", "", "0.8", "", "", "","",false,false); parameters.push_back(pmindiv); CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn); CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn); CommandParameter pmindiffs("mindiffs", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pmindiffs); CommandParameter pvsearchlocation("vsearch", "String", "", "", "", "", "","",false,false); parameters.push_back(pvsearchlocation); CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); abort = false; calledHelp = false; vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["alns"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChimeraVsearchCommand::getHelpString(){ try { string helpString = ""; helpString += "The chimera.vsearch command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for vsearch https://github.com/torognes/vsearch.\n"; helpString += "The chimera.vsearch command parameters are fasta, name, count, reference, processors, dereplicate, removechimeras, abskew, uchimealns, minh, mindiv, xn, dn, mindiffs.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "If the dereplicate parameter is false, then if one group finds the sequence to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; helpString += "The uchimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n"; helpString += "The removechimeras parameter allows you to indicate you would like to automatically remove the sequences that are flagged as chimeric. Default=t.\n"; helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n"; helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n"; helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n"; helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n"; helpString += "The mindiffs parameter - minimum number of differences in segment Default = (3).\n"; helpString += "The vsearch parameter allows you to specify the name and location of your vsearch executable. By default mothur will look in your path and mothur's executable and mothur tools locations. You can set the vsearch location as follows, vsearch=/usr/bin/vsearch.\n"; helpString += "The chimera.vsearch command should be in the following format: \n"; helpString += "chimera.vsearch(fasta=yourFastaFile, reference=yourTemplate) \n"; helpString += "Example: chimera.vsearch(fasta=AD.align, reference=silva.gold.align) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChimeraVsearchCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "... vsearch file does not exist. mothur requires the vsearch executable."; issues.push_back(issue); string ianswer = "\tThe chimera.vsearch command is a wrapper for the vsearch program, https://github.com/torognes/vsearch. We distribute the vsearch executable with the executable versions of mothur. By default, mothur will look for vsearch in the same location mothur's executable is as well as looking in your $PATH variable.\n"; ianswers.push_back(ianswer); string howto = "How do I use the dereplicate parameter?"; howtos.push_back(howto); string hanswer = "\tThe dereplicate parameter can be used when checking for chimeras by group. If the dereplicate parameter is false, then if one group finds the sequence to be chimeric, then all groups find it to be chimeric, default=f. If you set dereplicate=t, and then when a sequence is found to be chimeric it is removed from it’s group, not the entire dataset.\n\nNote: When you set dereplicate=t, mothur generates a new count table with the chimeras removed and counts adjusted by sample. It is important to note if you set dereplicate=true, do NOT include the count file with the remove.seqs command. For a detailed example, please reference https://mothur.org/wiki/chimera_dereplicate_example/\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string ChimeraVsearchCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "chimera") { pattern = "[filename],[tag],vsearch.chimeras"; } else if (type == "accnos") { pattern = "[filename],[tag],vsearch.accnos"; } else if (type == "fasta") { pattern = "[filename],[tag],vsearch.fasta"; } else if (type == "alns") { pattern = "[filename],[tag],vsearch.alns"; } else if (type == "count") { pattern = "[filename],[tag],vsearch.count_table-[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ChimeraVsearchCommand::ChimeraVsearchCommand(string option) : Command() { try { hasCount=false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } bool hasName = false; string namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } if (namefile != "") { hasName = true; } //check for required parameters countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "") { hasCount = true; } //make sure there is at least one valid file left if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } bool hasGroup = false; string groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); hasGroup = true; } if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } string path; map::iterator it = parameters.find("reference"); //user has given a template file if(it != parameters.end()){ if (it->second == "self") { templatefile = "self"; } else { templatefile = validParameter.validFile(parameters, "reference"); if (templatefile == "not open") { abort = true; } else if (templatefile == "not found") { //check for saved reference sequences m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true; } } }else if ((hasName) || (hasCount) || (hasGroup)) { templatefile = "self"; } else { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); templatefile = ""; abort = true; } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); abskew = validParameter.valid(parameters, "abskew"); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring.\n"); useAbskew = false; } temp = validParameter.valid(parameters, "chimealns"); if (temp == "not found") { temp = "f"; } chimealns = util.isTrue(temp); temp = validParameter.valid(parameters, "removechimeras"); if (temp == "not found") { temp = "t"; } removeChimeras = util.isTrue(temp); minh = validParameter.valid(parameters, "minh"); if (minh == "not found") { useMinH = false; minh = "0.28"; } else{ useMinH = true; } mindiv = validParameter.valid(parameters, "mindiv"); if (mindiv == "not found") { useMindiv = false; mindiv = "0.8"; } else{ useMindiv = true; } xn = validParameter.valid(parameters, "xn"); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; } dn = validParameter.valid(parameters, "dn"); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; } mindiffs = validParameter.valid(parameters, "mindiffs"); if (mindiffs == "not found") { useMindiffs = false; mindiffs = "3"; } else{ useMindiffs = true; } temp = validParameter.valid(parameters, "dereplicate"); if (temp == "not found") { temp = "false"; } dups = util.isTrue(temp); vector versionOutputs; bool foundTool = false; string programName = "vsearch"; programName += EXECUTABLE_EXT; vsearchLocation = validParameter.validFile(parameters, "vsearch"); if (vsearchLocation == "not found") { vsearchLocation = ""; foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); } else { //test to make sure vsearch exists ifstream in; vsearchLocation = util.getFullPathName(vsearchLocation); bool ableToOpen = util.openInputFile(vsearchLocation, in, "no error"); in.close(); if(!ableToOpen) { m->mothurOut(vsearchLocation + " file does not exist or cannot be opened, ignoring.\n"); vsearchLocation = ""; foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); }else { foundTool = true; } } if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting.\n"); abort=true; } //look for vsearch exe path = current->getProgramPath(); if (!foundTool) { abort = true; } if (foundTool && !abort) { if (versionOutputs.size() != 0) { if (versionOutputs[0] == "vsearch") { if (versionOutputs.size() >= 2) { string version = versionOutputs[1]; int pos = version.find_first_of('_'); if (pos != string::npos) { version = version.substr(0, pos); } if (!util.isVsearchVersionValid(version, "v2.13.5")) { m->mothurOut("[ERROR]: vsearch version found = " + version + ". Mothur requires minimum version v2.13.5. Vsearch is distributed with mothur's executable or available on github https://github.com/torognes/vsearch/releases/, please correct.\n"); abort = true; }else { m->mothurOut("Using vsearch version " + version + ".\n"); } } } } } if (!abort) { if ((namefile != "") || (groupfile != "")) { //convert to count string rootFileName = namefile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(rootFileName)); string outputFileName = getOutputFileName("count", variables); CountTable ct; ct.createTable(namefile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; hasCount = true; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: vsearch location using " + vsearchLocation + "\n"); } } } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "ChimeraVsearchCommand"); exit(1); } } //********************************************************************************************************************** //string outputFName, string filename, string accnos, string alns, int& numChimeras void driver(vsearchData* params){ try { params->driverOutputFName = params->util.getFullPathName(params->driverOutputFName); params->formattedFastaFilename = params->util.getFullPathName(params->formattedFastaFilename); params->driverAlns = params->util.getFullPathName(params->driverAlns); string outputFNamec = params->driverOutputFName+"vsearch_out"; //to allow for spaces in the path params->driverOutputFName = "\"" + params->driverOutputFName + "\""; params->formattedFastaFilename = "\"" + params->formattedFastaFilename + "\""; params->driverAlns = "\"" + params->driverAlns + "\""; outputFNamec = "\"" + outputFNamec + "\""; vector cPara; string vsearchCommand = params->vsearchLocation; vsearchCommand = "\"" + vsearchCommand + "\" "; cPara.push_back(params->util.mothurConvert(vsearchCommand)); string fileToRemove = ""; string numProcessors = toString(params->processors); //are you using a reference file if (params->templatefile != "self") { string rootFileName = params->formattedFastaFilename.substr(1, params->formattedFastaFilename.length()-2); string outputFileName = rootFileName + ".vsearch_formatted"; fileToRemove = outputFileName; //vsearch cant handle some of the things allowed in mothurs fasta files so we remove them ifstream in; params->util.openInputFile(rootFileName, in); ofstream out; params->util.openOutputFile(outputFileName, out); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { seq.printUnAlignedSequence(out); } } in.close(); out.close(); params->formattedFastaFilename = outputFileName; params->formattedFastaFilename = "\"" + params->formattedFastaFilename + "\""; //add reference file cPara.push_back(params->util.mothurConvert("--db")); cPara.push_back(params->util.mothurConvert(params->formattedFastaFilename)); //add reference file cPara.push_back(params->util.mothurConvert("--uchime_ref")); cPara.push_back(params->util.mothurConvert(params->formattedFastaFilename)); }else { //denovo numProcessors = toString(1); cPara.push_back(params->util.mothurConvert("--uchime_denovo")); cPara.push_back(params->util.mothurConvert(params->formattedFastaFilename)); } //output filename cPara.push_back(params->util.mothurConvert("--chimeras")); cPara.push_back(params->util.mothurConvert(outputFNamec)); cPara.push_back(params->util.mothurConvert("--uchimeout")); cPara.push_back(params->util.mothurConvert(params->driverOutputFName)); cPara.push_back(params->util.mothurConvert("--xsize")); if (params->vars->chimealns) { cPara.push_back(params->util.mothurConvert("--uchimealns")); cPara.push_back(params->util.mothurConvert(params->driverAlns)); } if (params->vars->useAbskew) { cPara.push_back(params->util.mothurConvert("--abskew")); cPara.push_back(params->util.mothurConvert(params->vars->abskew)); } if (params->vars->useMinH) { cPara.push_back(params->util.mothurConvert("--minh")); cPara.push_back(params->util.mothurConvert(params->vars->minh)); } if (params->vars->useMindiv) { cPara.push_back(params->util.mothurConvert("--mindiv")); cPara.push_back(params->util.mothurConvert(params->vars->mindiv)); } if (params->vars->useMindiffs) { cPara.push_back(params->util.mothurConvert("--mindiffs")); cPara.push_back(params->util.mothurConvert(params->vars->mindiffs)); } if (params->vars->useXn) { cPara.push_back(params->util.mothurConvert("--xn")); cPara.push_back(params->util.mothurConvert(params->vars->xn)); } if (params->vars->useDn) { cPara.push_back(params->util.mothurConvert("--dn")); cPara.push_back(params->util.mothurConvert(params->vars->dn)); } //--threads cPara.push_back(params->util.mothurConvert("--threads")); cPara.push_back(params->util.mothurConvert(numProcessors)); char** vsearchParameters; vsearchParameters = new char*[cPara.size()]; string commandString = ""; for (int i = 0; i < cPara.size(); i++) { vsearchParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: vsearch command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] vsearchParameters; if (fileToRemove != "") { params->util.mothurRemove(fileToRemove); } //remove "" from filenames params->driverOutputFName = params->driverOutputFName.substr(1, params->driverOutputFName.length()-2); outputFNamec = outputFNamec.substr(1, outputFNamec.length()-2); params->formattedFastaFilename = params->formattedFastaFilename.substr(1, params->formattedFastaFilename.length()-2); params->driverAlns = params->driverAlns.substr(1, params->driverAlns.length()-2); if (params->m->getControl_pressed()) { return; } //create accnos file from vsearch results ifstream in; params->util.openInputFile(outputFNamec, in, "no error"); ofstream out; params->util.openOutputFile(params->driverAccnos, out); params->numChimeras = 0; while(!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); out << seq.getName() << endl; params->numChimeras++; } in.close(); out.close(); params->util.mothurRemove(outputFNamec); return; } catch(exception& e) { params->m->errorOut(e, "ChimeraVsearchCommand", "driver"); exit(1); } } //*************************************************************************************************************** int ChimeraVsearchCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Checking sequences from " + fastafile + " ...\n" ); long start = time(nullptr); if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string outputFileName = getOutputFileName("chimera", variables); string accnosFileName = getOutputFileName("accnos", variables); string alnsFileName = getOutputFileName("alns", variables); string newFasta = util.getRootName(fastafile) + "temp"; string newCountFile = ""; //you provided a groupfile bool hasGroups = false; int numSeqs = 0; if (hasCount) { CountTable ct; if (ct.testGroups(countfile)) { hasGroups = true; } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); newCountFile = getOutputFileName("count", variables); } vars = new vsearchVariables(); vars->setBooleans(dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useMindiffs, hasCount); vars->setVariables(abskew, minh, mindiv, xn, dn, mindiffs); //setup fasta file if denovo and no groups if ((templatefile == "self") && (!hasGroups)) { //you want to run vsearch with a template=self and no groups if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing.\n"); processors = 1; } if (hasCount) { } else { countfile = getCountFile(fastafile); hasCount = true; } map seqs; numSeqs = readFasta(fastafile, seqs); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //read namefile vector nameMapCount; //int error; if (hasCount) { CountTable ct; ct.readTable(countfile, true, false); for(map::iterator it = seqs.begin(); it != seqs.end(); it++) { int num = ct.getNumSeqs(it->first); if (num != 0) { seqPriorityNode temp(num, it->second, it->first); nameMapCount.push_back(temp); } } } if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting.\n"); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } util.printVsearchFile(nameMapCount, newFasta, ";size=", ";"); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } if (hasGroups) { //Parse sequences by group vector groups; map > group2Files; if (hasCount) { current->setMothurCalling(true); SequenceCountParser cparser(countfile, fastafile, nullVector); current->setMothurCalling(false); groups = cparser.getNamesOfGroups(); group2Files = cparser.getFiles(); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //clears files ofstream out, out1, out2; util.openOutputFile(outputFileName, out); out.close(); util.openOutputFile(accnosFileName, out1); out1.close(); if (chimealns) { util.openOutputFile(alnsFileName, out2); out2.close(); } //paralellized in vsearch map > seqs2RemoveByGroup; createProcessesGroups(group2Files, outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, seqs2RemoveByGroup); if (hasCount && dups) { CountTable newCount; newCount.readTable(countfile, true, false); for (map >::iterator it = seqs2RemoveByGroup.begin(); it != seqs2RemoveByGroup.end(); it++) { string group = it->first; for (int k = 0; k < it->second.size(); k++) { newCount.setAbund(it->second[k], group, 0); } } newCount.printTable(newCountFile); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } if (!dups) { long long numRedund = 0; int totalChimeras = deconvoluteResults(outputFileName, accnosFileName, alnsFileName, numRedund); m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check your sequences. " + toString(totalChimeras) + " chimeras were found.\n"); m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples.\n"); }else { if (hasCount) { unordered_set doNotRemove; CountTable c; c.readTable(newCountFile, true, true); //returns non zeroed names vector namesInTable = c.printTable(newCountFile); outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); for (int i = 0; i < namesInTable.size(); i++) { doNotRemove.insert(namesInTable[i]); } //remove names we want to keep from accnos file. unordered_set accnosNames = util.readAccnos(accnosFileName); ofstream out2; util.openOutputFile(accnosFileName, out2); for (auto it = accnosNames.begin(); it != accnosNames.end(); it++) { if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; } } out2.close(); } } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } }else{ if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } map > dummay; vector dummyGroups; vsearchData* dataBundle = new vsearchData(processors, dummay, outputFileName, vsearchLocation, templatefile, newFasta, countfile, accnosFileName, alnsFileName, "", dummyGroups, vars); dataBundle->setDriverNames(outputFileName, alnsFileName, accnosFileName); driver(dataBundle); int numChimeras = dataBundle->numChimeras; //add headings ofstream out; util.openOutputFile(outputFileName+".temp", out); out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; out.close(); util.appendFiles(outputFileName, outputFileName+".temp"); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //remove file made for vsearch if (templatefile == "self") { util.mothurRemove(newFasta); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check your sequences. " + toString(numChimeras) + " chimeras were found.\n"); } outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); } if (removeChimeras) { if (!util.isBlank(accnosFileName)) { m->mothurOut("\nRemoving chimeras from your input files:\n"); string inputString = "fasta=" + fastafile + ", accnos=" + accnosFileName; if ((countfile != "") && (!dups)) { inputString += ", count=" + countfile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (countfile != "") { if (!dups) { //dereplicate=f, so remove sequences where any sample found the reads to be chimeric map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string currentName = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], currentName); util.mothurRemove(filenames["count"][0]); outputNames.push_back(currentName); outputTypes["count"].push_back(currentName); }//else, mothur created a modified count file removing chimeras by sample. No need to include count file on remove.seqs command. Deconvolute function created modified count table already } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = "denovo"; if (templatefile != "self") { variables["[tag]"] = "ref"; } string currentName = getOutputFileName("fasta", variables); util.renameFile(filenames["fasta"][0], currentName); util.mothurRemove(filenames["fasta"][0]); outputNames.push_back(currentName); outputTypes["fasta"].push_back(currentName); }else { m->mothurOut("\nNo chimeras found, skipping remove.seqs.\n"); } } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ChimeraVsearchCommand::deconvoluteResults(string outputFileName, string accnosFileName, string alnsFileName, long long& numRedund){ try { ofstream out2; util.openOutputFile(accnosFileName+".temp", out2); int total = 0; string name; set namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once set::iterator itNames; set chimerasInFile; set::iterator itChimeras; if (!util.isBlank(accnosFileName)) { //edit accnos file ifstream in2; util.openInputFile(accnosFileName, in2); while (!in2.eof()) { if (m->getControl_pressed()) { in2.close(); out2.close(); util.mothurRemove(outputFileName); util.mothurRemove((accnosFileName+".temp")); return 0; } in2 >> name; gobble(in2); itChimeras = chimerasInFile.find(name); if (itChimeras == chimerasInFile.end()) { out2 << name << endl; chimerasInFile.insert(name); total++; } } in2.close(); } out2.close(); util.mothurRemove(accnosFileName); rename((accnosFileName+".temp").c_str(), accnosFileName.c_str()); //edit chimera file ifstream in; util.openInputFile(outputFileName, in); ofstream out; util.openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; float temp1; string parent1, parent2, parent3, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag; name = ""; namesInFile.clear(); //assumptions - in file each read will always look like - if vsearch source is updated, revisit this code. /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N 0.0000 GQY1XT001C296C;size=356; * * * * * * * * 0 0 0 0 0 0 * N 0.0469 GQY1XT001CPCVN;size=154; GQY1XT001C296C;size=356; GQY1XT001C44N8;size=323; GQY1XT001C44N8;size=323; 93.8 91.5 92.3 92.6 92.3 4 0 7 9 3 7 1.5 N 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N */ while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove((outputFileName+".temp")); return 0; } bool print = false; in >> temp1; gobble(in); in >> name; gobble(in); in >> parent1; gobble(in); in >> parent2; gobble(in); in >> parent3; gobble(in); in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag; gobble(in); //is this name already in the file itNames = namesInFile.find((name)); if (itNames == namesInFile.end()) { //no not in file if (flag == "N") { //are you really a no?? //is this sequence really not chimeric?? itChimeras = chimerasInFile.find(name); //then you really are a no so print, otherwise skip if (itChimeras == chimerasInFile.end()) { print = true; } }else{ print = true; } } if (print) { namesInFile.insert(name); out << temp1 << '\t' << name << '\t' << parent1 << '\t' << parent2 << '\t' << parent3 << '\t' << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << '\t' << temp13 << '\t' << flag << endl; } } in.close(); out.close(); util.mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); //edit anls file //assumptions - in file each read will always look like - if vsearch source is updated, revisit this code. /* ------------------------------------------------------------------------ Query ( 179 nt) F21Fcsw_11639/ab=591/ ParentA ( 179 nt) F11Fcsw_6529/ab=1625/ ParentB ( 181 nt) F21Fcsw_12128/ab=1827/ A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78 Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78 B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80 Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000 Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160 Diffs NNN N N N N N BB NNN Votes 000 0 0 0 0 0 ++ 000 Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB A 159 TGGAACTGAGACACGGTCCAA 179 Q 159 TGGAACTGAGACACGGTCCAA 179 B 161 TGGAACTGAGACACGGTCCAA 181 Diffs Votes Model BBBBBBBBBBBBBBBBBBBBB Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5% Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047 */ if (chimealns) { ifstream in3; util.openInputFile(alnsFileName, in3); ofstream out3; util.openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint); name = ""; namesInFile.clear(); string line = ""; while (!in3.eof()) { if (m->getControl_pressed()) { in3.close(); out3.close(); util.mothurRemove(outputFileName); util.mothurRemove((accnosFileName)); util.mothurRemove((alnsFileName+".temp")); return 0; } line = ""; line = util.getline(in3); string temp = ""; if (line != "") { istringstream iss(line); iss >> temp; //are you a name line if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) { int spot = 0; for (int i = 0; i < line.length(); i++) { spot = i; if (line[i] == ')') { break; } else { out3 << line[i]; } } if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + ".\n"); m->setControl_pressed(true); } else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + ".\n"); m->setControl_pressed(true); } else { out << line[spot] << line[spot+1]; name = line.substr(spot+2); //only limit repeats on query names if (temp == "Query") { itNames = namesInFile.find(name); if (itNames == namesInFile.end()) { out << name << endl; namesInFile.insert(name); } }else { out << name << endl; } } }else { out3 << line << endl; } //not need to alter line }else { out3 << endl; } } in3.close(); out3.close(); util.mothurRemove(alnsFileName); rename((alnsFileName+".temp").c_str(), alnsFileName.c_str()); } return total; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "deconvoluteResults"); exit(1); } } //********************************************************************************************************************** int ChimeraVsearchCommand::readFasta(string filename, map& seqs){ try { //create input file for vsearch //read through fastafile and store info ifstream in; util.openInputFile(filename, in); int num = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } Sequence seq(in); gobble(in); seqs[seq.getName()] = seq.getUnaligned(); num++; } in.close(); return num; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** string ChimeraVsearchCommand::getCountFile(string& inputFile){ try { string countFile = ""; m->mothurOut("\nNo namesfile given, running unique.seqs command to generate one.\n\n"); //use unique.seqs to create new name and fastafile string inputString = "format=count, fasta=" + inputFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); countFile = filenames["count"][0]; inputFile = filenames["fasta"][0]; return countFile; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getCountFile"); exit(1); } } //********************************************************************************************************************** int getSeqsVsearch(map& nameMap, string thisGroupsFormattedOutputFilename, string tag, string tag2, long long& numSeqs, string thisGroupsFastaFile, MothurOut* m){ try { int error = 0; ifstream in; Utils util; util.openInputFile(thisGroupsFastaFile, in); vector nameVector; map::iterator itNameMap; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); itNameMap = nameMap.find(seq.getName()); if (itNameMap == nameMap.end()){ error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fastafile, but is not in your name or count file, please correct.\n"); }else { int num = itNameMap->second; seqPriorityNode temp(num, seq.getUnaligned(), seq.getName()); nameVector.push_back(temp); } } in.close(); if (error == 1) { return 1; } numSeqs = nameVector.size(); util.printVsearchFile(nameVector, thisGroupsFormattedOutputFilename, tag, tag2); return error; } catch(exception& e) { m->errorOut(e, "ChimeraVsearchCommand", "getSeqsVsearch"); exit(1); } } //********************************************************************************************************************** //out << ">" << nameMapCount[i].name << tag << nameMapCount[i].numIdentical << tag2 << endl << nameMapCount[i].seq << endl; //map > parsedFiles, string outputFName, string filename, string accnos, string alns, string countlist void driverGroups(vsearchData* params){ try { int totalSeqs = 0; Utils util; for (map >::iterator it = params->parsedFiles.begin(); it != params->parsedFiles.end(); it++) { long start = time(nullptr); if (params->m->getControl_pressed()) { return; } long long thisGroupsSeqs = 0; string thisGroup = it->first; map nameMap; if (params->vars->hasCount) { CountTable ct; ct.readTable(it->second[1], false, true); nameMap = ct.getNameMap(); } else { nameMap = util.readNames(it->second[1]); } int error = getSeqsVsearch(nameMap, params->formattedFastaFilename, ";size=", ";", thisGroupsSeqs, it->second[0], params->m); if ((error == 1) || params->m->getControl_pressed()) { return; } totalSeqs += thisGroupsSeqs; //driver((outputFName + thisGroup), filename, (accnos+thisGroup), (alns+thisGroup), numChimeras); params->setDriverNames((params->outputFName + thisGroup), (params->alns+thisGroup), (params->accnos+thisGroup)); driver(params); if (params->m->getControl_pressed()) { return; } //remove file made for vsearch if (!params->m->getDebug()) { util.mothurRemove(params->formattedFastaFilename); } else { params->m->mothurOut("[DEBUG]: saving file: " + params->formattedFastaFilename + ".\n"); } //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table //This table will zero out group counts for seqs determined to be chimeric by that group. if (params->vars->dups) { if (!util.isBlank(params->accnos+thisGroup)) { ifstream in; util.openInputFile(params->accnos+thisGroup, in); string name; if (params->vars->hasCount) { //add group to seqs2 vector namesOfChimeras; while (!in.eof()) { in >> name; gobble(in); namesOfChimeras.push_back(name); } in.close(); params->seqs2RemoveByGroup[thisGroup] = namesOfChimeras; }else { map thisnamemap; util.readNames(it->second[1], thisnamemap); map::iterator itN; ofstream out; util.openOutputFile(params->accnos+thisGroup+".temp", out); while (!in.eof()) { in >> name; gobble(in); itN = thisnamemap.find(name); if (itN != thisnamemap.end()) { vector tempNames; util.splitAtComma(itN->second, tempNames); for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; } }else { params->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); params->m->setControl_pressed(true); } } out.close(); in.close(); util.renameFile(params->accnos+thisGroup+".temp", params->accnos+thisGroup); } } } //append files util.appendFiles((params->outputFName+thisGroup), params->outputFName); util.mothurRemove((params->outputFName+thisGroup)); util.appendFiles((params->accnos+thisGroup), params->accnos); util.mothurRemove((params->accnos+thisGroup)); if (params->vars->chimealns) { util.appendFiles((params->alns+thisGroup), params->alns); util.mothurRemove((params->alns+thisGroup)); } params->m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to check " + toString(thisGroupsSeqs) + " sequences from group " + thisGroup + ".\n"); } params->count = totalSeqs; } catch(exception& e) { params->m->errorOut(e, "ChimeraVsearchCommand", "driverGroups"); exit(1); } } /**************************************************************************************************/ //driverGroups(group2Files, outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile); int ChimeraVsearchCommand::createProcessesGroups(map >& groups2Files, string outputFName, string filename, string accnos, string alns, string newCountFile, vector groups, map >& seqs2RemoveByGroup) { try { //sanity check if (groups.size() < processors) { processors = groups.size(); m->mothurOut("Reducing processors to " + toString(groups.size()) + ".\n"); } //divide the groups between the processors vector lines; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; long long num = 0; time_t start, end; time(&start); //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[i+1].start; j < lines[i+1].end; j++) { map >::iterator it = groups2Files.find(groups[j]); if (it != groups2Files.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } vsearchData* dataBundle = new vsearchData(processors, thisGroupsParsedFiles, outputFName+extension, vsearchLocation, templatefile, filename+extension, countfile, accnos+extension, alns+extension, accnos+".byCount."+extension, thisGroups, vars); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverGroups, dataBundle)); } vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[0].start; j < lines[0].end; j++) { map >::iterator it = groups2Files.find(groups[j]); if (it != groups2Files.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } vsearchData* dataBundle = new vsearchData(processors, thisGroupsParsedFiles, outputFName, vsearchLocation, templatefile, filename, countfile, accnos, alns, accnos+".byCount.temp", thisGroups, vars); driverGroups(dataBundle); num = dataBundle->count; int numChimeras = dataBundle->numChimeras; seqs2RemoveByGroup = dataBundle->seqs2RemoveByGroup; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; numChimeras += data[i]->numChimeras; for (map >::iterator it = data[i]->seqs2RemoveByGroup.begin(); it != data[i]->seqs2RemoveByGroup.end(); it++) { map >::iterator itSanity = seqs2RemoveByGroup.find(it->first); if (itSanity == seqs2RemoveByGroup.end()) { //we haven't seen this group, should always be true seqs2RemoveByGroup[it->first] = it->second; } } string extension = toString(i+1) + ".temp"; util.appendFiles((outputFName+extension), outputFName); util.mothurRemove((outputFName+extension)); util.appendFiles((accnos+extension), accnos); util.mothurRemove((accnos+extension)); delete data[i]; delete workerThreads[i]; } delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to check " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/chimeravsearchcommand.h000077500000000000000000000077741424121717000227400ustar00rootroot00000000000000// // chimeravsearchcommand.h // Mothur // // Created by Sarah Westcott on 6/16/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__chimeravsearchcommand__ #define __Mothur__chimeravsearchcommand__ #include "command.hpp" #include "sequenceparser.h" #include "counttable.h" #include "sequencecountparser.h" /**************************************************************************************************/ struct vsearchVariables { bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, hasCount, useMindiffs; string abskew, minh, mindiv, xn, dn, mindiffs; vsearchVariables() = default; void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool mindif, bool hc) { useAbskew = Abskew; chimealns = calns; useMinH = MinH; useMindiv = Mindiv; useMindiffs = mindif; useXn = Xn; useDn = Dn; hasCount = hc; dups = dps; } void setVariables(string abske, string min, string mindi, string x, string d, string mind) { abskew = abske; minh = min; mindiv = mindi; mindiffs = mind; xn = x; dn = d; } }; /**************************************************************************************************/ struct vsearchData { string dupsfile; string outputFName; string accnos, alns, formattedFastaFilename, templatefile, vsearchLocation; string driverAccnos, driverAlns, driverOutputFName; map > parsedFiles; map > seqs2RemoveByGroup; int count, numChimeras, processors; vector groups; vsearchVariables* vars; MothurOut* m; Utils util; vsearchData(){} vsearchData(int proc, map > g2f, string o, string uloc, string t, string file, string n, string ac, string al, string nc, vector gr, vsearchVariables* vs) { dupsfile = n; formattedFastaFilename = file; outputFName = o; templatefile = t; accnos = ac; alns = al; m = MothurOut::getInstance(); groups = gr; count = 0; numChimeras = 0; vsearchLocation = uloc; vars = vs; driverAccnos = ac; driverAlns = al; driverOutputFName = o; parsedFiles = g2f; processors = proc; } void setDriverNames(string o, string al, string ac) { driverAccnos = ac; driverAlns = al; driverOutputFName = o; } }; /***********************************************************/ class ChimeraVsearchCommand : public Command { public: ChimeraVsearchCommand(string); ~ChimeraVsearchCommand() = default; vector setParameters(); string getCommandName() { return "chimera.vsearch"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "vsearch by https://github.com/torognes/vsearch.\nhttp://www.mothur.org/wiki/Chimera.vsearch\n"; } string getDescription() { return "detect chimeric sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, ucl, useMindiffs, hasCount, dups, removeChimeras; string fastafile, templatefile, countfile, abskew, minh, mindiv, xn, dn, mindiffs, vsearchLocation; int processors; vsearchVariables* vars; vector outputNames; string getCountFile(string&); int readFasta(string, map&); int deconvoluteResults(string, string, string, long long&); int prepFile(string filename, string); int createProcessesGroups(map >& groups2Files, string outputFName, string filename, string accnos, string alns, string newCountFile, vector groups, map >&); }; #endif mothur-1.48.0/source/commands/chopseqscommand.cpp000077500000000000000000001121431424121717000221170ustar00rootroot00000000000000/* * chopseqscommand.cpp * Mothur * * Created by westcott on 5/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "chopseqscommand.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector ChopSeqsCommand::setParameters(){ try { CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","fastq",false,true,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pnumbases("numbases", "Number", "", "0", "", "", "","",false,true,true); parameters.push_back(pnumbases); CommandParameter pcountgaps("countgaps", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcountgaps); CommandParameter pshort("short", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pshort); CommandParameter pkeep("keep", "Multiple", "front-back", "front", "", "", "","",false,false); parameters.push_back(pkeep); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pkeepn("keepn", "Boolean", "", "f", "", "", "","",false,false); parameters.push_back(pkeepn); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ChopSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The chop.seqs command reads a fasta or fastq file and outputs a *.chop.* file containing the trimmed sequences. Note: If a sequence is completely 'chopped', an accnos file will be created with the names of the sequences removed. \n"; helpString += "The chop.seqs command parameters are fasta, fastq, qfile, name, group, count, numbases, countgaps and keep. fasta or fastq is required unless you have a valid current fasta file. numbases is required.\n"; helpString += "The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n"; helpString += "If you provide a name, group or count file any sequences removed from the fasta file will also be removed from those files.\n"; helpString += "The qfile parameter allows you to provide a quality file associated with the fastafile.\n"; helpString += "The numbases parameter allows you to specify the number of bases you want to keep.\n"; helpString += "The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n"; helpString += "The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n"; helpString += "The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n"; helpString += "The keepn parameter allows you to specify you want to keep ambigous bases, default=false.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n"; helpString += "Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ChopSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],chop.fasta"; } else if (type == "fastq") { pattern = "[filename],chop.fastq"; } else if (type == "qfile") { pattern = "[filename],chop.qual"; } else if (type == "name") { pattern = "[filename],chop.names"; } else if (type == "group") { pattern = "[filename],chop.groups"; } else if (type == "count") { pattern = "[filename],chop.count_table"; } else if (type == "accnos") { pattern = "[filename],chop.accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ChopSeqsCommand::ChopSeqsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); //check for required parameters ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } fastqfile = validParameter.validFile(parameters, "fastq"); if (fastqfile == "not open") { fastqfile = ""; abort = true; } else if (fastqfile == "not found") { fastqfile = ""; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { qualfile = ""; abort = true; } else if (qualfile == "not found") { qualfile = ""; } else { current->setQualFile(qualfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((fastqfile == "") && (fastafile == "")) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and did not provide a fastqfile. The fasta or fastq parameter is required to run the chop.seqs command.\n"); abort = true; } } string temp = validParameter.valid(parameters, "numbases"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, numbases); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "countgaps"); if (temp == "not found") { temp = "f"; } countGaps = util.isTrue(temp); temp = validParameter.valid(parameters, "short"); if (temp == "not found") { temp = "f"; } Short = util.isTrue(temp); temp = validParameter.valid(parameters, "keepn"); if (temp == "not found") { if (qualfile!= "") { temp = "t"; }else { temp = "f"; } } keepN = util.isTrue(temp); format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if (((!keepN) && (qualfile != "")) || ((countGaps) && (qualfile != ""))){ m->mothurOut("[ERROR]: You cannot set keepn=false with a quality file, or set countgaps to true.\n"); abort = true; } keep = validParameter.valid(parameters, "keep"); if (keep == "not found") { keep = "front"; } if (numbases == 0) { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand"); exit(1); } } //********************************************************************************************************************** int ChopSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } bool wroteAccnos = true; string outputFileNameAccnos = ""; if (fastafile != "") { wroteAccnos = runChopFasta(outputFileNameAccnos); } else { wroteAccnos = runChopFastq(outputFileNameAccnos); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (wroteAccnos) { map variables; outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); //use remove.seqs to create new name, group and count file if ((countfile != "") || (namefile != "") || (groupfile != "")) { string inputString = "accnos=" + outputFileNameAccnos; if (countfile != "") { inputString += ", count=" + countfile; } else{ if (namefile != "") { inputString += ", name=" + namefile; } if (groupfile != "") { inputString += ", group=" + groupfile; } } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (groupfile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); string outGroup = getOutputFileName("group", variables); util.renameFile(filenames["group"][0], outGroup); outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup); } if (namefile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); string outName = getOutputFileName("name", variables); util.renameFile(filenames["name"][0], outName); outputNames.push_back(outName); outputTypes["name"].push_back(outName); } if (countfile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); string outCount = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], outCount); outputNames.push_back(outCount); outputTypes["count"].push_back(outCount); } } } else { util.mothurRemove(outputFileNameAccnos); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } if (wroteAccnos) { //set accnos file as new current accnosfile itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** bool ChopSeqsCommand::runChopFasta(string& outputFileNameAccnos){ try { map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("fasta", variables); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); outputFileNameAccnos = getOutputFileName("accnos", variables); string fastafileTemp = ""; if (qualfile != "") { fastafileTemp = outputFileName + ".qualFile.Positions.temp"; } bool wroteAccnos = createProcesses(fastafile, outputFileName, outputFileNameAccnos, fastafileTemp); if (m->getControl_pressed()) { return wroteAccnos; } if (qualfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); string outputQualFileName = getOutputFileName("qfile", variables); outputNames.push_back(outputQualFileName); outputTypes["qfile"].push_back(outputQualFileName); processQual(outputQualFileName, fastafileTemp); util.mothurRemove(fastafileTemp); } return wroteAccnos; }catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "runChopFasta"); exit(1); } } //********************************************************************************************************************** bool ChopSeqsCommand::runChopFastq(string& outputFileNameAccnos){ try { map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); string outputFileName = getOutputFileName("fastq", variables); outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName); outputFileNameAccnos = getOutputFileName("accnos", variables); ifstream in; util.openInputFile(fastqfile, in); ofstream out; util.openOutputFile(outputFileName, out); ofstream outAccnos; util.openOutputFile(outputFileNameAccnos, outAccnos); bool wroteAccnos = false; long long count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } bool ignore; FastqRead seq(in, ignore, format); gobble(in); if (seq.getName() != "") { bool isGood = getFastqChopped(seq); //output trimmed sequence if (isGood) { seq.printFastq(out); } else{ outAccnos << seq.getName() << endl; wroteAccnos = true; } count++; } //report progress if((count) % 10000 == 0){ m->mothurOut(toString(count)+"\n"); } } //report progress if((count) % 10000 != 0){ m->mothurOut(toString(count)+"\n"); } in.close(); out.close(); outAccnos.close(); return wroteAccnos; }catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "runChopFastq"); exit(1); } } //********************************************************************************************************************** bool ChopSeqsCommand::getFastqChopped(FastqRead& seq) { try { string temp = seq.getSeq(); vector scores = seq.getScores(); //if needed trim sequence if (keep == "front") {//you want to keep the beginning int tempLength = temp.length(); if (tempLength > numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = 0; i < temp.length(); i++) { numBasesCounted++; if (numBasesCounted >= numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; scores.clear(); } else { temp = temp.substr(0, stopSpot+1); if(scores.size() >= stopSpot+1){ scores.resize((stopSpot+1)); } } }else { if (!Short) { temp = ""; scores.clear(); } //sequence too short } }else { //you are keeping the back int tempLength = temp.length(); if (tempLength > numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = (temp.length()-1); i >= 0; i--) { numBasesCounted++; if (numBasesCounted >= numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; } else { temp = temp.substr(stopSpot); if(scores.size() >= stopSpot){ vector scores2; for (int h = stopSpot; h < scores.size(); h++) { scores2.push_back(scores[h]); } scores.clear(); scores = scores2; } } }else { if (!Short) { temp = ""; scores.clear(); } //sequence too short } } seq.setSeq(temp); seq.setScores(scores); if (temp != "") { return true; } return false; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "getFastqChopped"); exit(1); } } /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct chopData { string filename, keep, qualValues, qualFileOutput; int count; unsigned long long start; unsigned long long end; OutputWriter* threadWriterAccnos; OutputWriter* threadWriterOutput; MothurOut* m; Utils util; int numbases; bool countGaps, Short, wroteAccnos, keepN; Sequence thisSeq; chopData(){} chopData(string f, unsigned long long st, unsigned long long en, OutputWriter* wo, OutputWriter* wa, string qv) { m = MothurOut::getInstance(); filename = f; threadWriterOutput = wo; threadWriterAccnos = wa; qualFileOutput = qv; start = st; end = en; count = 0; qualValues = ""; keep = "front"; countGaps = false; Short = false; keepN = false; if (qv!="") { keepN = true; } wroteAccnos = false; } void setVariables(string k, bool cGaps, int nbases, bool S, bool kn) { keep = k; countGaps = cGaps; numbases = nbases; Short = S; keepN = kn; } void setChopped(Sequence s) { thisSeq = s; } }; //********************************************************************************************************************** string getChopped(chopData* params) { try { string temp = params->thisSeq.getAligned(); string tempUnaligned = params->thisSeq.getUnaligned(); if (params->countGaps) { //if needed trim sequence if (params->keep == "front") {//you want to keep the beginning int tempLength = temp.length(); if (tempLength > params->numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = 0; i < temp.length(); i++) { //eliminate N's if (!params->keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } } numBasesCounted++; if (numBasesCounted >= params->numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; } else { temp = temp.substr(0, stopSpot+1); } }else { if (!params->Short) { temp = ""; } //sequence too short } }else { //you are keeping the back int tempLength = temp.length(); if (tempLength > params->numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = (temp.length()-1); i >= 0; i--) { //eliminate N's if (!params->keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } } numBasesCounted++; if (numBasesCounted >= params->numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; } else { temp = temp.substr(stopSpot+1); } }else { if (!params->Short) { temp = ""; } //sequence too short } } }else{ //if needed trim sequence if (params->keep == "front") {//you want to keep the beginning int tempLength = tempUnaligned.length(); if (tempLength > params->numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = 0; i < temp.length(); i++) { //eliminate N's if (!params->keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; tempLength--; if (tempLength < params->numbases) { stopSpot = 0; break; } } } if(isalpha(temp[i])) { numBasesCounted++; } if (numBasesCounted >= params->numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; } else { temp = temp.substr(0, stopSpot+1); } params->qualValues = params->thisSeq.getName() +'\t' + toString(0) + '\t' + toString(stopSpot+1) + '\n'; }else { if (!params->Short) { temp = ""; params->qualValues = params->thisSeq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short else { params->qualValues = params->thisSeq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; } } }else { //you are keeping the back int tempLength = tempUnaligned.length(); if (tempLength > params->numbases) { //you have enough bases to remove some int stopSpot = 0; int numBasesCounted = 0; for (int i = (temp.length()-1); i >= 0; i--) { if (!params->keepN) { //eliminate N's if (toupper(temp[i]) == 'N') { temp[i] = '.'; tempLength--; if (tempLength < params->numbases) { stopSpot = 0; break; } } } if(isalpha(temp[i])) { numBasesCounted++; } if (numBasesCounted >= params->numbases) { stopSpot = i; break; } } if (stopSpot == 0) { temp = ""; } else { temp = temp.substr(stopSpot); } params->qualValues = params->thisSeq.getName() +'\t' + toString(stopSpot) + '\t' + toString(temp.length()-1) + '\n'; }else { if (!params->Short) { temp = ""; params->qualValues = params->thisSeq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short else { params->qualValues = params->thisSeq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; } } } } return temp; } catch(exception& e) { params->m->errorOut(e, "ChopSeqsCommand", "getChopped"); exit(1); } } /**************************************************************************************/ void driverChop(chopData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //adjust if (params->start == 0) { params->util.zapGremlins(in); gobble(in); } ofstream outfTemp; if (params->qualFileOutput != "") { params->util.openOutputFile(params->qualFileOutput, outfTemp); } bool wroteAccnos = false; while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { params->qualValues = ""; params->thisSeq = seq; string newSeqString = getChopped(params); //output trimmed sequence if (newSeqString != "") { params->threadWriterOutput->write(">"+seq.getName()+"\n"+newSeqString+"\n"); } else{ params->threadWriterAccnos->write(seq.getName()+"\n"); wroteAccnos = true; } if (params->qualFileOutput != "") { outfTemp << params->qualValues << endl; } params->count++; } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif //report progress if((params->count) % 10000 == 0){ params->m->mothurOut(toString(params->count)+"\n"); } } //report progress if((params->count) % 10000 != 0){ params->m->mothurOut(toString(params->count)+"\n"); } in.close(); if (params->qualFileOutput != "") { outfTemp.close(); } params->wroteAccnos = wroteAccnos; } catch(exception& e) { params->m->errorOut(e, "ChopSeqsCommand", "driver"); exit(1); } } /**************************************************************************************************/ bool ChopSeqsCommand::createProcesses(string filename, string outFasta, string outAccnos, string fastafileTemp) { try { //create array of worker threads vector workerThreads; vector data; vector lines; long long num = 0; vector positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, -1)); }//forces it to read whole file else { positions = util.setFilePosFasta(filename, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif auto synchronizedOutputFile = std::make_shared(outFasta); auto synchronizedAccnosFile = std::make_shared(outAccnos); //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); chopData* dataBundle = new chopData(filename, lines[i+1].start, lines[i+1].end, threadOutputWriter, threadAccnosWriter, fastafileTemp+extension); dataBundle->setVariables(keep, countGaps, numbases, Short, keepN); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverChop, dataBundle)); } OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); chopData* dataBundle = new chopData(filename, lines[0].start, lines[0].end, threadOutputWriter, threadAccnosWriter, fastafileTemp); dataBundle->setVariables(keep, countGaps, numbases, Short, keepN); driverChop(dataBundle); num = dataBundle->count; bool wroteAccnos = dataBundle->wroteAccnos; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->threadWriterOutput; delete data[i]->threadWriterAccnos; if (data[i]->wroteAccnos) { wroteAccnos = true; } #if defined NON_WINDOWS #else if (data[i]->count != data[i]->end) { m->mothurOut("[ERROR]: process " + toString(i+1) + " only processed " + toString(data[i]->count) + " of " + toString(data[i]->end) + " sequences assigned to it, quitting. \n"); m->setControl_pressed(true); } #endif if (fastafileTemp != "") { util.appendFiles(data[i]->qualFileOutput, fastafileTemp); util.mothurRemove(data[i]->qualFileOutput); } delete data[i]; delete workerThreads[i]; } synchronizedOutputFile->close(); synchronizedAccnosFile->close(); delete threadOutputWriter; delete threadAccnosWriter; delete dataBundle; return wroteAccnos; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** int ChopSeqsCommand::processQual(string outputFile, string inputFile) { try { ofstream out; util.openOutputFile(outputFile, out); ifstream in; util.openInputFile(inputFile, in); ifstream inQual; util.openInputFile(qualfile, inQual); m->mothurOut("Processing the quality file.\n"); int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); return 0; } QualityScores qual(inQual); gobble(inQual); string name = ""; int start = 0; int end = 0; in >> name >> start >> end; gobble(in); if (qual.getName() != "") { if (qual.getName() != name) { start = 0; end = 0; } else if (start != 0) { qual.trimQScores(start, -1); qual.printQScores(out); }else if ((start == 0) && (end == 0)) {} else if ((start == 0) && (end != 0)) { qual.trimQScores(-1, end); qual.printQScores(out); } } count++; //report progress if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } //report progress if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } in.close(); inQual.close(); out.close(); return 0; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "processQual"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/chopseqscommand.h000077500000000000000000000024031424121717000215610ustar00rootroot00000000000000#ifndef CHOPSEQSCOMMAND_H #define CHOPSEQSCOMMAND_H /* * chopseqscommand.h * Mothur * * Created by westcott on 5/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" #include "qualityscores.h" #include "writer.h" #include "fastqread.h" class ChopSeqsCommand : public Command { public: ChopSeqsCommand(string); ~ChopSeqsCommand(){}; vector setParameters(); string getCommandName() { return "chop.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Chops.seqs"; } string getDescription() { return "trim sequence length"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastafile, fastqfile, keep, namefile, groupfile, countfile, qualfile, format; bool abort, countGaps, Short, keepN; int numbases, processors; vector outputNames; bool runChopFasta(string&); bool runChopFastq(string&); bool getFastqChopped(FastqRead&); bool createProcesses(string, string, string, string); int processQual(string, string); }; #endif mothur-1.48.0/source/commands/classifyotucommand.cpp000077500000000000000000001010151424121717000226330ustar00rootroot00000000000000/* * classifyotucommand.cpp * Mothur * * Created by westcott on 6/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "classifyotucommand.h" #include "phylotree.h" #include "phylosummary.h" //********************************************************************************************************************** vector ClassifyOtuCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter poutput("output", "Multiple", "plain-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); CommandParameter pprintlevel("printlevel", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pprintlevel); CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppersample); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "","",false,false); parameters.push_back(pbasis); CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff); CommandParameter pthreshold("threshold", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pthreshold); CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["taxsummary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClassifyOtuCommand::getHelpString(){ try { string helpString = ""; helpString += "The classify.otu command parameters are list, taxonomy, name, group, count, persample, cutoff, label, basis, relabund and probs. The taxonomy and list parameters are required unless you have a valid current file.\n"; helpString += "The name parameter allows you add a names file with your taxonomy file.\n"; helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n"; helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n"; helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n"; helpString += "For example consider the following basis=sequence could give Clostridiales 3 105 16 43 46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n"; helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n"; helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n"; helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n"; helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n"; helpString += "The persample parameter allows you to find a consensus taxonomy for each group. Default=f\n"; helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The default value for label is all labels in your inputfile.\n"; helpString += "The output parameter allows you to specify format of your summary file. Options are simple and detail. The default is detail.\n"; helpString += "The printlevel parameter allows you to specify taxlevel of your summary file to print to. Options are 1 to the maz level in the file. The default is -1, meaning max level. If you select a level greater than the level your sequences classify to, mothur will print to the level your max level. \n"; helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your otu taxonomy output. The default is 51, meaning 51%. Cutoff cannot be below 51.\n"; helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n"; helpString += "The threshold parameter allows you to specify a cutoff for the taxonomy file that is being inputted. Once the classification falls below the threshold the mothur will refer to it as unclassified when calculating the concensus. This feature is similar to adjusting the cutoff in classify.seqs. Default=0.\n"; helpString += "The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n"; helpString += "Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClassifyOtuCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "constaxonomy") { pattern = "[filename],[distance],cons.taxonomy"; } else if (type == "taxsummary") { pattern = "[filename],[distance],cons.tax.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ClassifyOtuCommand::ClassifyOtuCommand(string option) : Command() { try{ allLines = true; //allow user to run help if (option == "help") { help(); abort = true; calledHelp = true; }else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { //if there is a current list file, use it listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current listfile and the list parameter is required.\n"); abort = true; } } else if (listfile == "not open") { abort = true; } else { current->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not found") { //if there is a current list file, use it taxfile = current->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required.\n"); abort = true; } } else if (taxfile == "not open") { abort = true; } else { current->setTaxonomyFile(taxfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; allLines = true; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } basis = validParameter.valid(parameters, "basis"); if (basis == "not found") { basis = "otu"; } if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu.\n"); } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "51"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "threshold"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, threshold); temp = validParameter.valid(parameters, "probs"); if (temp == "not found"){ temp = "true"; } probs = util.isTrue(temp); temp = validParameter.valid(parameters, "persample"); if (temp == "not found"){ temp = "f"; } persample = util.isTrue(temp); temp = validParameter.valid(parameters, "relabund"); if (temp == "not found"){ temp = "false"; } relabund = util.isTrue(temp); temp = validParameter.valid(parameters, "printlevel"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, printlevel); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "detail"; } if ((output != "simple") && (output != "detail")) { m->mothurOut(output + " is not a valid output form. Options are simple and detail. I will use detail.\n"); output = "detail"; } if ((groupfile == "") && (countfile == "")) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; } } if (countfile != "") { CountTable cts; if (!cts.testGroups(countfile)) { if (persample) { m->mothurOut("persample is only valid with a group file, or count file with group information. Setting persample=f.\n"); persample = false; } } } if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand"); exit(1); } } //********************************************************************************************************************** int ClassifyOtuCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //if user gave a namesfile then use it if (namefile != "") { util.readNames(namefile, nameMap, true); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); groups = groupMap->getNamesOfGroups(); } else { groupMap = nullptr; } if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, true, false); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } } else { ct = nullptr; } //read taxonomy file and save in map for easy access in building bin trees bool removeConfidences = false; if (threshold == 0) { removeConfidences = true; } util.readTax(taxfile, taxMap, removeConfidences); if (threshold != 0) { processTaxMap(); } if (m->getControl_pressed()) { return 0; } InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if (groupMap != nullptr) { delete groupMap; } if (ct != nullptr) { delete ct; } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set constaxonomy file as new current constaxonomyfile string currentName = ""; itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "execute"); exit(1); } } //********************************************************************************************************************** vector ClassifyOtuCommand::findConsensusTaxonomy(vector names, int& size, string& conTax, string group) { try{ conTax = ""; vector allNames; map::iterator it; map::iterator it2; //create a tree containing sequences from this bin PhyloTree* phylo = new PhyloTree(); size = 0; for (int i = 0; i < names.size(); i++) { if (group != "") { //no need to check for name file, names already added in previous step //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique it = taxMap.find(names[i]); if (it == taxMap.end()) { //this name is not in taxonomy file, skip it m->mothurOut("[WARNING]: " + names[i] + " is not in your taxonomy file. I will not include it in the consensus.\n"); }else{ if (countfile != "") { int numDups = ct->getGroupCount(names[i], group); for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); } size += numDups; }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; } allNames.push_back(names[i]); } }else { //if namesfile include the names if (namefile != "") { //is this sequence in the name file - namemap maps seqName -> repSeqName it2 = nameMap.find(names[i]); if (it2 == nameMap.end()) { //this name is not in name file, skip it m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus.\n"); }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique it = taxMap.find(it2->second); if (it == taxMap.end()) { //this name is not in taxonomy file, skip it if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus.\n"); } else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus.\n"); } }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; allNames.push_back(names[i]); } } }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique it = taxMap.find(names[i]); if (it == taxMap.end()) { //this name is not in taxonomy file, skip it m->mothurOut("[WARNING]: " + names[i] + " is not in your taxonomy file. I will not include it in the consensus.\n"); }else{ if (countfile != "") { int numDups = ct->getNumSeqs(names[i]); for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); } size += numDups; }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; } allNames.push_back(names[i]); } } } if (m->getControl_pressed()) { delete phylo; return allNames; } } //build tree phylo->assignHeirarchyIDs(0); TaxNode currentNode = phylo->get(0); int myLevel = 0; //at each level while (currentNode.children.size() != 0) { //you still have more to explore TaxNode bestChild; int bestChildSize = 0; //go through children for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { TaxNode temp = phylo->get(itChild->second); //select child with largest accesions - most seqs assigned to it if (temp.accessions.size() > bestChildSize) { bestChild = phylo->get(itChild->second); bestChildSize = temp.accessions.size(); } } //phylotree adds an extra unknown so we want to remove that if (bestChild.name == "unknown") { bestChildSize--; } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); if (consensusConfidence >= cutoff) { //if yes, add it if (probs) { conTax += bestChild.name + "(" + toString(consensusConfidence) + ");"; }else{ conTax += bestChild.name + ";"; } myLevel++; }else{ //if no, quit break; } //move down a level currentNode = bestChild; } if (conTax == "") { conTax = "unknown;"; } if (myLevel != phylo->getMaxLevel()) { conTax = util.addUnclassifieds(conTax, phylo->getMaxLevel(), probs); } delete phylo; return allNames; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy"); exit(1); } } //********************************************************************************************************************** int ClassifyOtuCommand::process(ListVector* processList) { try{ string conTax; int size; //create output file if (outputdir == "") { outputdir += util.hasPath(listfile); } ofstream out; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[distance]"] = processList->getLabel(); string outputFile = getOutputFileName("constaxonomy", variables); util.openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream outSum; string outputSumFile = getOutputFileName("taxsummary", variables); util.openOutputFile(outputSumFile, outSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); out << "OTU\tSize\tTaxonomy" << endl; PhyloSummary* taxaSum; if (countfile != "") { taxaSum = new PhyloSummary(ct,relabund, printlevel); } else { taxaSum = new PhyloSummary(groupMap,relabund, printlevel); } vector outs; vector taxaSums; map groupIndex; if (persample) { for (int i = 0; i < groups.size(); i++) { groupIndex[groups[i]] = i; variables["[distance]"] = processList->getLabel() + "." + groups[i]; string outputFile = getOutputFileName("constaxonomy", variables); ofstream temp; util.openOutputFile(outputFile, temp); outs.push_back(outputFile); temp << "OTU\tSize\tTaxonomy" << endl; outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); PhyloSummary* taxaSumt; if (countfile != "") { taxaSumt = new PhyloSummary(ct, relabund, printlevel); }else { taxaSumt = new PhyloSummary(groupMap,relabund, printlevel); } taxaSums.push_back(taxaSumt); } } //for each bin in the list vector string snumBins = toString(processList->getNumBins()); vector binLabels = processList->getLabels(); for (int i = 0; i < processList->getNumBins(); i++) { if (m->getControl_pressed()) { break; } vector names; string binnames = processList->get(i); vector thisNames; util.splitAtComma(binnames, thisNames); names = findConsensusTaxonomy(thisNames, size, conTax, ""); if (m->getControl_pressed()) { break; } out << binLabels[i] << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; util.removeConfidences(noConfidenceConTax); //add this bins taxonomy to summary if (basis == "sequence") { for(int j = 0; j < names.size(); j++) { //int numReps = 1; //if (countfile != "") { numReps = ct->getNumSeqs(names[j]); } //for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); } taxaSum->addSeqToTree(names[j], noConfidenceConTax); } }else { //otu map containsGroup; if (countfile != "") { if (ct->hasGroupInfo()) { vector mGroups = ct->getNamesOfGroups(); for (int k = 0; k < names.size(); k++) { vector counts = ct->getGroupCounts(names[k]); for (int h = 0; h < counts.size(); h++) { if (counts[h] != 0) { containsGroup[mGroups[h]] = true; } } } } }else { if (groupfile != "") { vector mGroups = groupMap->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; } for (int k = 0; k < names.size(); k++) { //find out the sequences group string group = groupMap->getGroup(names[k]); if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total.\n"); } else { containsGroup[group] = true; } } } } taxaSum->addSeqToTree(noConfidenceConTax, containsGroup); } if (persample) { //divide names by group map > parsedNames; map >::iterator itParsed; //parse names by group for (int j = 0; j < names.size(); j++) { if (groupfile != "") { string group = groupMap->getGroup(names[j]); itParsed = parsedNames.find(group); if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); } else { vector tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; } }else { //count file was used vector thisSeqsGroups = ct->getGroups(names[j]); for (int k = 0; k < thisSeqsGroups.size(); k++) { string group = thisSeqsGroups[k]; itParsed = parsedNames.find(group); if (itParsed != parsedNames.end()) { itParsed->second.push_back(names[j]); } else { vector tempNames; tempNames.push_back(names[j]); parsedNames[group] = tempNames; } } } } for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) { vector theseNames = findConsensusTaxonomy(itParsed->second, size, conTax, itParsed->first); if (m->getControl_pressed()) { break; } ofstream out; util.openOutputFileAppend(outs[groupIndex[itParsed->first]], out); out << binLabels[i] << '\t' << size << '\t' << conTax << endl; out.close(); string noConfidenceConTax = conTax; util.removeConfidences(noConfidenceConTax); //add this bins taxonomy to summary if (basis == "sequence") { for(int j = 0; j < theseNames.size(); j++) { int numReps = 1; if (countfile != "") { numReps = ct->getGroupCount(theseNames[j], itParsed->first); } //get num seqs for this seq from this group for(int k = 0; k < numReps; k++) { (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(theseNames[j], noConfidenceConTax); } } }else { //otu map containsGroup; containsGroup[itParsed->first] = true; (taxaSums[groupIndex[itParsed->first]])->addSeqToTree(noConfidenceConTax, containsGroup); } } } } out.close(); //print summary file taxaSum->print(outSum, output); outSum.close(); if (persample) { for (int i = 0; i < groups.size(); i++) { ofstream outSums; variables["[distance]"] = processList->getLabel() + "." + groups[i]; string outputSumFile = getOutputFileName("taxsummary", variables); util.openOutputFile(outputSumFile, outSums); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); taxaSums[i]->print(outSums, output); outSums.close(); delete taxaSums[i]; } } delete taxaSum; return 0; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "process"); exit(1); } } /**************************************************************************************************/ int ClassifyOtuCommand::processTaxMap() { try{ for (map::iterator it = taxMap.begin(); it != taxMap.end(); it++) { if (m->getControl_pressed()) { break; } vector taxons; string tax = it->second; int taxLength = tax.length(); string taxon = ""; int spot = 0; for(int i=0;isecond = newTax; //} }else { util.removeConfidences(tax); it->second = tax; } //leave tax alone } return 0; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "processTaxMap"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/classifyotucommand.h000077500000000000000000000031621424121717000223040ustar00rootroot00000000000000#ifndef CLASSIFYOTUSCOMMAND_H #define CLASSIFYOTUSCOMMAND_H /* * classifyotucommand.h * Mothur * * Created by westcott on 6/1/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" #include "inputdata.h" #include "counttable.h" class ClassifyOtuCommand : public Command { public: ClassifyOtuCommand(string); ~ClassifyOtuCommand() = default; vector setParameters(); string getCommandName() { return "classify.otu"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219.\nhttp://www.mothur.org/wiki/Classify.otu"; } string getDescription() { return "find the concensus taxonomy for each OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: GroupMap* groupMap; CountTable* ct; ListVector* list; InputData* input; string listfile, namefile, taxfile, label, groupfile, basis, countfile, output; bool abort, allLines, probs, persample, relabund; int cutoff, threshold, printlevel; set labels; //holds labels to be used vector outputNames, groups; map nameMap; map taxMap; int process(ListVector*); int processTaxMap(); vector findConsensusTaxonomy(vector, int&, string&, string); // returns the name of the "representative" taxonomy of given bin }; #endif mothur-1.48.0/source/commands/classifyseqscommand.cpp000066400000000000000000001036421424121717000230040ustar00rootroot00000000000000/* * classifyseqscommand.cpp * Mothur * * Created by westcott on 11/2/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "classifyseqscommand.h" //********************************************************************************************************************** vector ClassifySeqsCommand::setParameters(){ try { CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptaxonomy); CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","taxonomy",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter poutput("output", "Multiple", "simple-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter psearch("search", "Multiple", "kmer-suffix-distance-align", "kmer", "", "", "","",false,false); parameters.push_back(psearch); CommandParameter pksize("ksize", "Number", "", "8", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pmethod("method", "Multiple", "wang-knn-zap", "wang", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pprintlevel("printlevel", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pprintlevel); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); CommandParameter pcutoff("cutoff", "Number", "", "80", "", "", "","",false,true); parameters.push_back(pcutoff); CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs); CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,true); parameters.push_back(piters); CommandParameter pshortcuts("shortcuts", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshortcuts); CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "","",false,true); parameters.push_back(pnumwanted); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["taxsummary"] = tempOutNames; outputTypes["matchdist"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClassifySeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n"; helpString += "The classify.seqs command parameters are " + getCommandParameters() + ". The reference, fasta and taxonomy parameters are required.\n"; helpString += "The search parameter allows you to specify the method to find most similar reference sequence. Your options are: suffix, kmer, align and distance. The default is kmer.\n"; helpString += "The name parameter allows you add a names file with your fasta file.\n"; helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n"; helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n"; helpString += "The method parameter allows you to specify classification method to use. Your options are: wang, knn and zap. The default is wang.\n"; helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is all available.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method. The default is 10.\n"; helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 80.\n"; helpString += "The probs parameter shuts off the bootstrapping results for the wang and zap method. The default is true, meaning you want the bootstrapping to be shown.\n"; helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the wang method. The default is 100.\n"; helpString += "The output parameter allows you to specify format of your summary file. Options are simple and detail. The default is detail.\n"; helpString += "The printlevel parameter allows you to specify taxlevel of your summary file to print to. Options are 1 to the max level in the file. The default is the max level. If you select a level greater than the level your sequences classify to, mothur will print all possible levels. \n"; helpString += "The classify.seqs command should be in the following format: \n"; helpString += "classify.seqs(reference=yourReferenceFile, fasta=yourFastaFile, taxonomy=yourTaxonomyFile) \n"; helpString += "Example classify.seqs(fasta=amazon.fasta, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax)\n"; helpString += "The .taxonomy file consists of 2 columns: 1 = your sequence name, 2 = the taxonomy for your sequence. \n"; helpString += "The .tax.summary is a summary of the different taxonomies represented in your fasta file. \n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClassifySeqsCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string question = "Does the reference need to be aligned?"; questions.push_back(question); string qanswer = "\tFor wang, knn and zap methods, mothur does not require an aligned reference to assign a taxonomy. Wang use k-mers to find the probabilities of the taxonomic assignment. \n"; qanswers.push_back(qanswer); question = "What reference should I use to classify?"; questions.push_back(question); qanswer = "\tWe provide mothur formatted references on the wiki. https://www.mothur.org/wiki/RDP_reference_files https://mothur.org/wiki/Silva_reference_files https://www.mothur.org/wiki/Greengenes-formatted_databases Alternatively, mothur allows you to create your own references as long as they are in fasta and taxonomy file format. You can find mothur's files formats here, https://www.mothur.org/wiki/File_Types. \n"; qanswers.push_back(qanswer); string issue = "Why are my sequences 'unclassifed'?"; issues.push_back(issue); string ianswer = "\tWhen it comes to classification there are two things main things that effect the number of unclassified results: the quality of the reads and the reference files. The bayesian classifier calculates the probabilities of reference sequences kmers being in a given genus and then uses those probabilities to classify the sequences. The quality of the query sequences affects the ability of the classifier to find enough kmers to find a good classification. A poor quality sequence is like turning up the noise in a crowded restaurant and trying to hear your date's father's name. Was that John, Tom or Ron? Uh oh... A good reference is also needed for similar reasons.\n"; ianswers.push_back(ianswer); string howto = "How do you recommend classifying to the species level?"; howtos.push_back(howto); string hanswer = "\tUnfortunately I do not. You will never get species level classification if you are using the RDP or Silva references. They only go to the genus level. Even the greengenes database only has 10% or so of sequences with species level names (greengenes hasn’t been updated in quite a few years). I and many others would contend that using 16S and especially a fragment to get a species name is asking too much - you need a culture and genome sequencing to do that. If someone wanted to give it a shot, they would need to add the species level names to the taxonomy strings. Also, they would need to add many more sequences that represent each species. Outside of a few groups of bacteria where the researchers have carefully selected the region (e.g. Lactobacillus or Staphylococcus), I really think this would be a lot of work for little/no benefit.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string ClassifySeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "taxonomy") { pattern = "[filename],[tag],[tag2],taxonomy"; } else if (type == "taxsummary") { pattern = "[filename],[tag],[tag2],tax.summary"; } else if (type == "accnos") { pattern = "[filename],[tag],[tag2],flip.accnos"; } else if (type == "matchdist") { pattern = "[filename],[tag],[tag2],match.dist"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ClassifySeqsCommand::ClassifySeqsCommand(string option) : Command() { try { hasName = false; hasCount=false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } if (namefile != "") { hasName = true; } //check for required parameters countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "") { hasCount = true; } //make sure there is at least one valid file left if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } bool hasGroup = false; groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); hasGroup = true; } if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); //this has to go after save so that if the user sets save=t and provides no reference we abort templateFileName = validParameter.validFile(parameters, "reference"); if (templateFileName == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required for the classify.seqs command.\n"); abort = true; }else if (templateFileName == "not open") { abort = true; } //this has to go after save so that if the user sets save=t and provides no reference we abort taxonomyFileName = validParameter.validFile(parameters, "taxonomy"); if (taxonomyFileName == "not found") { m->mothurOut("[ERROR]: The taxonomy parameter is a required for the classify.seqs command.\n"); abort = true; }else if (taxonomyFileName == "not open") { abort = true; } search = validParameter.valid(parameters, "search"); if (search == "not found"){ search = "kmer"; } method = validParameter.valid(parameters, "method"); if (method == "not found"){ method = "wang"; } temp = validParameter.valid(parameters, "ksize"); if (temp == "not found"){ temp = "8"; if (method == "zap") { temp = "7"; } } util.mothurConvert(temp, kmerSize); temp = validParameter.valid(parameters, "match"); if (temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "printlevel"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, printlevel); temp = validParameter.valid(parameters, "mismatch"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, misMatch); temp = validParameter.valid(parameters, "gapopen"); if (temp == "not found"){ temp = "-2.0"; } util.mothurConvert(temp, gapOpen); temp = validParameter.valid(parameters, "gapextend"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, gapExtend); temp = validParameter.valid(parameters, "numwanted"); if (temp == "not found"){ temp = "10"; } util.mothurConvert(temp, numWanted); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found"){ temp = "80"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "probs"); if (temp == "not found"){ temp = "true"; } probs = util.isTrue(temp); temp = validParameter.valid(parameters, "relabund"); if (temp == "not found"){ temp = "false"; } relabund = util.isTrue(temp); temp = validParameter.valid(parameters, "shortcuts"); if (temp == "not found"){ temp = "true"; } writeShortcuts = util.isTrue(temp); flip = true; temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, iters); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "detail"; } if ((output != "simple") && (output != "detail")) { m->mothurOut(output + " is not a valid output form. Options are simple and detail. I will use detail.\n"); output = "detail"; } if ((method == "wang") && (search != "kmer")) { m->mothurOut("The wang method requires the kmer search. " + search + " will be disregarded, and kmer will be used.\n" ); search = "kmer"; } if ((method == "zap") && ((search != "kmer") && (search != "align"))) { m->mothurOut("The zap method requires the kmer or align search. " + search + " will be disregarded, and kmer will be used.\n" ); search = "kmer"; } } } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "ClassifySeqsCommand"); exit(1); } } //********************************************************************************************************************** ClassifySeqsCommand::~ClassifySeqsCommand(){} //********************************************************************************************************************** int ClassifySeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string outputMethodTag = method; if(method == "wang"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion()); } else if(method == "zap"){ outputMethodTag = search + "_" + outputMethodTag; if (search == "kmer") { classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); } else { classify = new AlignTree(templateFileName, taxonomyFileName, cutoff); } } else { m->mothurOut(search + " is not a valid method option. I will run the command using wang.\n"); classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } if (m->getControl_pressed()) { delete classify; return 0; } m->mothurOut("Classifying sequences from " + fastafile + " ...\n" ); string baseTName = util.getSimpleName(taxonomyFileName); //set rippedTaxName to string RippedTaxName = ""; bool foundDot = false; for (int i = baseTName.length()-1; i >= 0; i--) { if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; } else if (foundDot && (baseTName[i] == '.')) { break; } else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; } } if (outputdir == "") { outputdir += util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = RippedTaxName; variables["[tag2]"] = outputMethodTag; string newTaxonomyFile = getOutputFileName("taxonomy", variables); string newaccnosFile = getOutputFileName("accnos", variables); string tempTaxonomyFile = outputdir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp"; string taxSummary = getOutputFileName("taxsummary", variables); if ((method == "knn") && (search == "distance")) { string DistName = getOutputFileName("matchdist", variables); classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); long start = time(nullptr); int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile); if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n"); outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile); }else { util.mothurRemove(newaccnosFile); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n"); start = time(nullptr); //read namefile map > nameMap; map >::iterator itNames; if(namefile != "") { m->mothurOut("Reading " + namefile + "..."); cout.flush(); nameMap.clear(); //remove old names util.readNames(namefile, nameMap); m->mothurOut(" Done.\n"); } //output taxonomy with the unclassified bins added ifstream inTax; util.openInputFile(newTaxonomyFile, inTax); ofstream outTax; string unclass = newTaxonomyFile + ".unclass.temp"; util.openOutputFile(unclass, outTax); //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = classify->getMaxLevel(); //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; GroupMap* groupMap = nullptr; CountTable* ct = nullptr; PhyloSummary* taxaSum; if (hasCount) { ct = new CountTable(); ct->readTable(countfile, true, false); taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); } taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } while (!inTax.eof()) { if (m->getControl_pressed()) { outputTypes.clear(); if (ct != nullptr) { delete ct; } if (groupMap != nullptr) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; return 0; } inTax >> name; gobble(inTax); taxon = util.getline(inTax); gobble(inTax); string newTax = util.addUnclassifieds(taxon, maxLevel, probs); outTax << name << '\t' << newTax << endl; if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct.\n"); exit(1); }else{ //add it as many times as there are identical seqs for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); } } inTax.close(); outTax.close(); util.mothurRemove(newTaxonomyFile); util.renameFile(unclass, newTaxonomyFile); if (m->getControl_pressed()) { outputTypes.clear(); if (ct != nullptr) { delete ct; } if (groupMap != nullptr) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; delete taxaSum; return 0; } //print summary file ofstream outTaxTree; util.openOutputFile(taxSummary, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); if (ct != nullptr) { delete ct; } if (groupMap != nullptr) { delete groupMap; } delete taxaSum; util.mothurRemove(tempTaxonomyFile); delete classify; m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set taxonomy file as new current taxonomyfile string currentName = ""; itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "execute"); exit(1); } } /**************************************************************************************************/ struct classifyData { OutputWriter* taxTWriter; OutputWriter* taxWriter; OutputWriter* accnosWriter; string search, taxonomyFileName, templateFileName, method, filename; unsigned long long start; unsigned long long end; MothurOut* m; Classify* classify; float match, misMatch, gapOpen, gapExtend; int count, kmerSize, threadID, cutoff, iters, numWanted; bool probs, flip, writeShortcuts; Utils util; classifyData(){} classifyData(OutputWriter* acc, bool p, OutputWriter* a, OutputWriter* r, string f, unsigned long long st, unsigned long long en, bool fli, Classify* c) { accnosWriter = acc; taxWriter = a; taxTWriter = r; filename = f; m = MothurOut::getInstance(); start = st; end = en; probs = p; flip = fli; count = 0; classify = c; } }; //********************************************************************************************************************** void driverClassifier(classifyData* params){ try { ifstream inFASTA; params->util.openInputFile(params->filename, inFASTA); inFASTA.seekg(params->start); string taxonomy; bool done = false; string taxBuffer = ""; string taxTBuffer = ""; string accnosBuffer = ""; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); if (candidateSeq->getName() != "") { string simpleTax = ""; bool flipped = false; taxonomy = params->classify->getTaxonomy(candidateSeq, simpleTax, flipped); if (params->m->getControl_pressed()) { delete candidateSeq; break; } if (taxonomy == "unknown;") { params->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences.\n"); } //output confidence scores or not if (params->probs) { taxBuffer += candidateSeq->getName() + '\t' + taxonomy + '\n'; } else { taxBuffer += candidateSeq->getName() + '\t' + simpleTax + '\n'; } if (flipped) { accnosBuffer += candidateSeq->getName() + '\n'; } taxTBuffer = candidateSeq->getName() + '\t' + simpleTax + '\n'; params->count++; } delete candidateSeq; //report progress if((params->count) % 100 == 0){ params->m->mothurOutJustToScreen(toString(params->count) +"\n"); params->taxTWriter->write(taxTBuffer); taxTBuffer = ""; params->taxWriter->write(taxBuffer); taxBuffer = ""; if (accnosBuffer != "") { params->accnosWriter->write(accnosBuffer); accnosBuffer = ""; } } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif } //report progress if((params->count) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); params->taxTWriter->write(taxTBuffer); taxTBuffer = ""; params->taxWriter->write(taxBuffer); taxBuffer = ""; if (accnosBuffer != "") { params->accnosWriter->write(accnosBuffer); accnosBuffer = ""; } } inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "ClassifySeqsCommand", "driver"); exit(1); } } /**************************************************************************************************/ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string accnos, string filename) { try { //create array of worker threads vector workerThreads; vector data; long long num = 0; vector positions; vector lines; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosFasta(filename, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif auto synchronizedAccnosFile = std::make_shared(accnos); auto synchronizedTaxFile = std::make_shared(taxFileName); auto synchronizedTaxTFile = std::make_shared(tempTaxFile); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadTaxWriter = new OutputWriter(synchronizedTaxFile); OutputWriter* threadTaxTWriter = new OutputWriter(synchronizedTaxTFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); classifyData* dataBundle = new classifyData(threadAccnosWriter, probs, threadTaxWriter, threadTaxTWriter, filename, lines[i+1].start, lines[i+1].end, flip, classify); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverClassifier, dataBundle)); } OutputWriter* threadTaxWriter = new OutputWriter(synchronizedTaxFile); OutputWriter* threadTaxTWriter = new OutputWriter(synchronizedTaxTFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedAccnosFile); classifyData* dataBundle = new classifyData(threadAccnosWriter, probs, threadTaxWriter, threadTaxTWriter, filename, lines[0].start, lines[0].end, flip, classify); driverClassifier(dataBundle); num = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->taxTWriter; delete data[i]->taxWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } synchronizedTaxTFile->close(); synchronizedTaxFile->close(); synchronizedAccnosFile->close(); delete threadTaxWriter; delete threadTaxTWriter; delete threadAccnosWriter; delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "createProcesses"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/classifyseqscommand.h000066400000000000000000000051301424121717000224420ustar00rootroot00000000000000#ifndef CLASSIFYSEQSCOMMAND_H #define CLASSIFYSEQSCOMMAND_H /* * classifyseqscommand.h * Mothur * * Created by westcott on 11/2/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "classify.h" #include "sequence.hpp" #include "bayesian.h" #include "phylotree.h" #include "phylosummary.h" #include "knn.h" #include "kmertree.h" #include "aligntree.h" //KNN and Wang methods modeled from algorithms in //Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences //into the New Bacterial Taxonomy􏰎† //Qiong Wang,1 George M. Garrity,1,2 James M. Tiedje,1,2 and James R. Cole1* //Center for Microbial Ecology1 and Department of Microbiology and Molecular Genetics,2 Michigan State University, //East Lansing, Michigan 48824 //Received 10 January 2007/Accepted 18 June 2007 class ClassifySeqsCommand : public Command { public: ClassifySeqsCommand(string); ~ClassifySeqsCommand(); vector setParameters(); string getCommandName() { return "classify.seqs"; } string getCommandCategory() { return "Phylotype Analysis"; } string getCommonQuestions(); string getHelpString(); string getOutputPattern(string); string getCitation() { return "Wang Q, Garrity GM, Tiedje JM, Cole JR (2007). Naive Bayesian classifier for rapid assignment of rRNA sequences into the new bacterial taxonomy. Appl Environ Microbiol 73: 5261-7. [ for Bayesian classifier ] \nAltschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997). Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25: 3389-402. [ for BLAST ] \nDeSantis TZ, Hugenholtz P, Larsen N, Rojas M, Brodie EL, Keller K, Huber T, Dalevi D, Hu P, Andersen GL (2006). Greengenes, a chimera-checked 16S rRNA gene database and workbench compatible with ARB. Appl Environ Microbiol 72: 5069-72. [ for kmer ] \nhttp://www.mothur.org/wiki/Classify.seqs"; } string getDescription() { return "classify sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; Classify* classify; string fastafile, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, groupfile, output; int processors, kmerSize, numWanted, cutoff, iters, printlevel; float match, misMatch, gapOpen, gapExtend; bool abort, probs, save, flip, hasName, hasCount, writeShortcuts, relabund; int createProcesses(string, string, string, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/classifysvmsharedcommand.cpp000077500000000000000000000703761424121717000240370ustar00rootroot00000000000000// // classifysvmsharedcommand.cpp // Mothur // // Created by Joshua Lynch on 6/28/2013. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "classifysvmsharedcommand.h" //********************************************************************************************************************** vector ClassifySvmSharedCommand::setParameters() { try { //CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none", "summary", false, true, true); parameters.push_back(pshared); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none", "", false, true, true); parameters.push_back(pdesign); // RFE or classification? // mode should be either 'rfe' or 'classify' CommandParameter mode("mode", "String", "", "", "", "", "", "", false, false); parameters.push_back(mode); // cross validation parameters CommandParameter evaluationFoldCountParam("evaluationfolds", "Number", "", "3", "", "", "", "", false, false); parameters.push_back(evaluationFoldCountParam); CommandParameter trainingFoldCountParam("trainingfolds", "Number", "", "10", "", "", "", "", false, false); parameters.push_back(trainingFoldCountParam); CommandParameter smoc("smoc", "Number", "", "3", "", "", "", "", false, false); parameters.push_back(smoc); // Support Vector Machine parameters CommandParameter kernelParam("kernel", "String", "", "", "", "", "", "", false, false); parameters.push_back(kernelParam); // data transformation parameters // transform should be 'zeroone' or 'zeromean' ('zeromean' is default) CommandParameter transformParam("transform", "String", "", "", "", "", "", "", false, false); parameters.push_back(transformParam); CommandParameter verbosityParam("verbose", "Number", "", "0", "", "", "", "", false, false); parameters.push_back(verbosityParam); // want this parameter to behave like the one in classify.rf CommandParameter pstdthreshold("stdthreshold", "Number", "", "0.0", "", "", "", "", false, false); parameters.push_back(pstdthreshold); // pruning params end CommandParameter pgroups("groups", "String", "", "", "", "", "", "", false, false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "", "", false, false); parameters.push_back(plabel); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "", "", false, false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "", "", false, false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClassifySvmSharedCommand::getHelpString() { try { string helpString = ""; helpString += "The classifysvm.shared command allows you to ....\n"; helpString += "The classifysvm.shared command parameters are: shared, design, label, groups.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n"; helpString += "The classifysvm.shared should be in the following format: \n"; helpString += "classifysvm.shared(shared=yourSharedFile, design=yourDesignFile)\n"; return helpString; } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClassifySvmSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],[distance],summary"; } //makes file like: amazon.0.03.fasta else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ClassifySvmSharedCommand::ClassifySvmSharedCommand(string option) : Command() { try { allLines = true; //allow user to run help if (option == "help") { help(); abort = true; calledHelp = true; } else if (option == "citation") { citation(); abort = true; calledHelp = true; } else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } } else { current->setSharedFile(sharedfile); } //get design file, it is required designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { //if there is a current shared file, use it designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current designfile and the design parameter is required.\n"); abort = true; } } else { current->setDesignFile(designfile); } if (outputdir == "") { outputdir = util.hasPath(sharedfile); } //Groups must be checked later to make sure they are valid. //SharedUtilities has functions of check the validity, just make to so m->setGroups() after the checks. //If you are using these with a shared file no need to check the SharedRAbundVector class will call SharedUtilites for you, //kinda nice, huh? string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } //Commonly used to process list, rabund, sabund, shared and relabund files. //Look at "smart distancing" examples below in the execute function. string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if (label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string modeOption = validParameter.valid(parameters, "mode"); if ( modeOption == "not found" || modeOption == "rfe" ) { mode = "rfe"; } else if ( modeOption == "classify" ) { mode = "classify"; } else { m->mothurOut("the mode option " + modeOption + " is not recognized -- must be 'rfe' or 'classify'\n"); abort = true; } string ef = validParameter.valid(parameters, "evaluationfolds"); if ( ef == "not found") { evaluationFoldCount = 3; } else { util.mothurConvert(ef, evaluationFoldCount); } string tf = validParameter.valid(parameters, "trainingfolds"); if ( tf == "not found") { trainingFoldCount = 5; } else { util.mothurConvert(tf, trainingFoldCount); } string smocOption = validParameter.valid(parameters, "smoc"); smocList.clear(); if ( smocOption == "not found" ) { //smocOption = "0.001,0.01,0.1,1.0,10.0,100.0,1000.0"; } else { vector smocOptionList; //split(smocOption, ';', smocOptionList); util.splitAtDash(smocOption, smocOptionList); for (vector::iterator i = smocOptionList.begin(); i != smocOptionList.end(); i++) { smocList.push_back(atof(i->c_str())); } } // kernel specification // start with default parameter ranges for all kernels kernelParameterRangeMap.clear(); getDefaultKernelParameterRangeMap(kernelParameterRangeMap); // get the kernel option string kernelOption = validParameter.valid(parameters, "kernel"); // if the kernel option is "not found" then use all kernels with default parameter ranges // otherwise use only kernels listed in the kernelOption string if ( kernelOption == "not found" ) { } else { // if the kernel option has been specified then // remove kernel parameters from the kernel parameter map if // they are not listed in the kernel option // at this point the kernelParameterRangeMap looks like this: // linear_key : [ // smoc_key : smoc parameter range // linear_constant_key : linear constant range // ] // rbf_key : [ // smoc_key : smoc parameter range // rbf_gamma_key : rbf gamma range // ] // polynomial_key : [ // smoc_key : smoc parameter range // polynomial_degree_key : polynomial degree range // polynomial_constant_key : polynomial constant range // ] vector kernelList; vector unspecifiedKernelList; //split(kernelOption, '-', kernelList); util.splitAtDash(kernelOption, kernelList); set kernelSet(kernelList.begin(), kernelList.end()); // make a list of strings that are keys in the kernel parameter range map // but are not in the kernel list for (KernelParameterRangeMap::iterator i = kernelParameterRangeMap.begin(); i != kernelParameterRangeMap.end(); i++) { //should be kernelList here string kernelKey = i->first; if ( kernelSet.find(kernelKey) == kernelSet.end() ) { unspecifiedKernelList.push_back(kernelKey); } } for (vector::iterator i = unspecifiedKernelList.begin(); i != unspecifiedKernelList.end(); i++) { m->mothurOut("removing kernel " + *i ); m->mothurOutEndLine(); kernelParameterRangeMap.erase(*i); } } // go through the kernel parameter range map and check for options for each kernel for (KernelParameterRangeMap::iterator i = kernelParameterRangeMap.begin(); i != kernelParameterRangeMap.end(); i++) { string kernelKey = i->first; ParameterRangeMap& kernelParameters = i->second; for (ParameterRangeMap::iterator j = kernelParameters.begin(); j != kernelParameters.end(); j++) { string parameterKey = j->first; ParameterRange& kernelParameterRange = j->second; // has an option for this kernel parameter been specified? string kernelParameterKey = kernelKey + parameterKey; //m->mothurOut("looking for option " << kernelParameterKey << endl; string kernelParameterOption = validParameter.valid(parameters, kernelParameterKey); if (kernelParameterOption == "not found") { // we already have default values in the kernel parameter map } else { // replace the default parameters with the specified parameters kernelParameterRange.clear(); vector parameterList; //split(kernelParameterOption, ';', parameterList); util.splitAtDash(kernelParameterOption, parameterList); for (vector::iterator k = parameterList.begin(); k != parameterList.end(); k++) { kernelParameterRange.push_back(atof(k->c_str())); } } } } // get the normalization option string transformOption = validParameter.valid(parameters, "transform"); if ( transformOption == "not found" || transformOption == "unitmean") { transformName = "unitmean"; } else if ( transformOption == "zeroone" ) { transformName = "zeroone"; } else { m->mothurOut("the transform option " + transformOption + " is not recognized -- must be 'unitmean' or 'zeroone'\n"); abort = true; } // get the verbosity option string verbosityOption = validParameter.valid(parameters, "verbose"); if ( verbosityOption == "not found") { verbosity = 0; } else { util.mothurConvert(tf, verbosity); if (verbosity < OutputFilter::QUIET || verbosity > OutputFilter::TRACE) { m->mothurOut("verbose set to unsupported value " + verbosityOption + " -- must be between 0 and 3"); } } // get the std threshold option string stdthresholdOption = validParameter.valid(parameters, "stdthreshold"); if ( stdthresholdOption == "not found" ) { stdthreshold = -1.0; } else { util.mothurConvert(stdthresholdOption, stdthreshold); if ( stdthreshold <= 0.0 ) { m->mothurOut("stdthreshold set to unsupported value " + stdthresholdOption + " -- must be greater than 0.0"); } } } } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "ClassifySvmSharedCommand"); exit(1); } } //********************************************************************************************************************** int ClassifySvmSharedCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); vector currentLabels = lookup->getOTUNames(); //read design file designMap.read(designfile); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } vector data = lookup->getSharedRAbundVectors(); processSharedAndDesignData(data, currentLabels); for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } m->mothurOutEndLine(); m->mothurOut("Output File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClassifySharedCommand", "execute"); exit(1); } } //********************************************************************************************************************** // This static function is intended to read all the necessary information from // a pair of shared and design files needed for SVM classification. This information // is used to build a LabeledObservationVector. Each element of the LabeledObservationVector // looks like this: // LabeledObservationVector[0] = pair("label 0", &vector[10.0, 21.0, 13.0]) // where the vector in the second position of the pair records OTU abundances. void ClassifySvmSharedCommand::readSharedAndDesignFiles(const string& sharedFilePath, const string& designFilePath, LabeledObservationVector& labeledObservationVector, FeatureVector& featureVector) { InputData input(sharedFilePath, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); Groups = lookup->getNamesGroups(); DesignMap designMap; designMap.read(designFilePath); if (m->getControl_pressed()) { return ; } while ( lookup != nullptr ) { vector data = lookup->getSharedRAbundVectors(); readSharedRAbundVectors(data, designMap, labeledObservationVector, featureVector, lookup->getOTUNames()); for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); delete lookup; lookup = input.getSharedRAbundVectors(); } } void ClassifySvmSharedCommand::readSharedRAbundVectors(vector& lookup, DesignMap& designMap, LabeledObservationVector& labeledObservationVector, FeatureVector& featureVector, vector currentLabels) { for ( int j = 0; j < lookup.size(); j++ ) { //i++; //vector data = lookup[j]->getData(); Observation* observation = new Observation(lookup[j]->getNumBins(), 0.0); string sharedGroupName = lookup[j]->getGroup(); string treatmentName = designMap.get(sharedGroupName); //labeledObservationVector.push_back(make_pair(treatmentName, observation)); labeledObservationVector.push_back(LabeledObservation(j, treatmentName, observation)); for (int k = 0; k < lookup[j]->size(); k++) { observation->at(k) = double(lookup[j]->get(k)); if ( j == 0) { featureVector.push_back(Feature(k, currentLabels[k])); } } // let this happen later? //delete lookup[j]; } } void printPerformanceSummary(MultiClassSVM* s, ostream& output) { output << "multiclass SVM accuracy: " << s->getAccuracy() << endl; output << "two-class SVM performance" << endl; int labelFieldWidth = 2 + max_element(s->getLabels().begin(), s->getLabels().end())->size(); int performanceFieldWidth = 10; int performancePrecision = 3; output << setw(labelFieldWidth) << "class 1" << setw(labelFieldWidth) << "class 2" << setw(performanceFieldWidth) << "precision" << setw(performanceFieldWidth) << "recall" << setw(performanceFieldWidth) << "f" << setw(performanceFieldWidth) << "accuracy" << endl; for ( SvmVector::const_iterator svm = s->getSvmList().begin(); svm != s->getSvmList().end(); svm++ ) { SvmPerformanceSummary sps = s->getSvmPerformanceSummary(**svm); output << setw(labelFieldWidth) << setprecision(performancePrecision) << sps.getPositiveClassLabel() << setw(labelFieldWidth) << setprecision(performancePrecision) << sps.getNegativeClassLabel() << setw(performanceFieldWidth) << setprecision(performancePrecision) << sps.getPrecision() << setw(performanceFieldWidth) << setprecision(performancePrecision) << sps.getRecall() << setw(performanceFieldWidth) << setprecision(performancePrecision) << sps.getF() << setw(performanceFieldWidth) << setprecision(performancePrecision) << sps.getAccuracy() << endl; } } //********************************************************************************************************************** void ClassifySvmSharedCommand::processSharedAndDesignData(vector lookup, vector currentLabels) { try { OutputFilter outputFilter(verbosity); LabeledObservationVector labeledObservationVector; FeatureVector featureVector; readSharedRAbundVectors(lookup, designMap, labeledObservationVector, featureVector, currentLabels); // optionally remove features with low standard deviation if ( stdthreshold > 0.0 ) { FeatureVector removedFeatureVector = applyStdThreshold(stdthreshold, labeledObservationVector, featureVector); if (removedFeatureVector.size() > 0) { m->mothurOut(toString(removedFeatureVector.size()) + " OTUs were below the stdthreshold of " + toString(stdthreshold) + " and were removed\n"); if ( outputFilter.debug() ) { m->mothurOut("the following OTUs were below the standard deviation threshold of " + toString(stdthreshold) ); m->mothurOutEndLine(); for (FeatureVector::iterator i = removedFeatureVector.begin(); i != removedFeatureVector.end(); i++) { m->mothurOut(" " + toString(i->getFeatureLabel()) ); m->mothurOutEndLine(); } } } } // apply [0,1] standardization if ( transformName == "zeroone") { m->mothurOut("transforming data to lie within range [0,1]\n"); transformZeroOne(labeledObservationVector); } else { m->mothurOut("transforming data to have zero mean and unit variance\n"); transformZeroMeanUnitVariance(labeledObservationVector); } SvmDataset svmDataset(labeledObservationVector, featureVector); OneVsOneMultiClassSvmTrainer trainer(svmDataset, evaluationFoldCount, trainingFoldCount, outputFilter); if ( mode == "rfe" ) { SvmRfe svmRfe; ParameterRange& linearKernelConstantRange = kernelParameterRangeMap["linear"]["constant"]; ParameterRange& linearKernelSmoCRange = kernelParameterRangeMap["linear"]["smoc"]; RankedFeatureList rankedFeatureList = svmRfe.getOrderedFeatureList(svmDataset, trainer, linearKernelConstantRange, linearKernelSmoCRange); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string filename = getOutputFileName("summary", variables); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); m->mothurOutEndLine(); ofstream outputFile(filename.c_str()); int n = 0; int rfeRoundCount = rankedFeatureList.front().getRank(); m->mothurOut("ordered features:\n" ); m->mothurOut("index\tOTU\trank\n"); outputFile << setw(5) << "index" << setw(12) << "OTU" << setw(5) << "rank" << endl; for (RankedFeatureList::iterator i = rankedFeatureList.begin(); i != rankedFeatureList.end(); i++) { n++; int rank = rfeRoundCount - i->getRank() + 1; outputFile << setw(5) << n << setw(12) << i->getFeature().getFeatureLabel() << setw(5) << rank ; m->mothurOutEndLine(); if ( n <= 20 ) { m->mothurOut(toString(n) + toString(i->getFeature().getFeatureLabel()) + toString(rank) ); m->mothurOutEndLine(); } } outputFile.close(); } else { MultiClassSVM* mcsvm = trainer.train(kernelParameterRangeMap); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string filename = getOutputFileName("summary", variables); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); m->mothurOutEndLine(); ofstream outputFile(filename.c_str()); printPerformanceSummary(mcsvm, cout); printPerformanceSummary(mcsvm, outputFile); outputFile << "actual predicted" << endl; for ( LabeledObservationVector::const_iterator i = labeledObservationVector.begin(); i != labeledObservationVector.end(); i++ ) { Label actualLabel = i->getLabel(); outputFile << i->getDatasetIndex() << " " << actualLabel << " "; try { Label predictedLabel = mcsvm->classify(*(i->getObservation())); outputFile << predictedLabel << endl; } catch ( MultiClassSvmClassificationTie& e ) { outputFile << "tie" << endl; m->mothurOut("classification tie for observation " + toString(i->datasetIndex) + " with label " + toString(i->first)); m->mothurOutEndLine(); } } outputFile.close(); delete mcsvm; } } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "processSharedAndDesignData"); exit(1); } } //********************************************************************************************************************** void ClassifySvmSharedCommand::trainSharedAndDesignData(vector lookup, vector currentLabels) { try { LabeledObservationVector labeledObservationVector; FeatureVector featureVector; readSharedRAbundVectors(lookup, designMap, labeledObservationVector, featureVector, currentLabels); SvmDataset svmDataset(labeledObservationVector, featureVector); int evaluationFoldCount = 3; int trainFoldCount = 5; OutputFilter outputFilter(2); OneVsOneMultiClassSvmTrainer t(svmDataset, evaluationFoldCount, trainFoldCount, outputFilter); KernelParameterRangeMap kernelParameterRangeMap; getDefaultKernelParameterRangeMap(kernelParameterRangeMap); t.train(kernelParameterRangeMap); m->mothurOut("done training" ); m->mothurOutEndLine(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string filename = getOutputFileName("summary", variables); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); m->mothurOutEndLine(); m->mothurOut("leaving processSharedAndDesignData" ); m->mothurOutEndLine(); } catch (exception& e) { m->errorOut(e, "ClassifySvmSharedCommand", "trainSharedAndDesignData"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/classifysvmsharedcommand.h000077500000000000000000000046651424121717000235020ustar00rootroot00000000000000// // classifysvmsharedcommand.h // Mothur // // Created by Joshua Lynch on 6/28/2013. // Copyright (c) 2013 Schloss Lab. All rights reserved. // // This class is based on ClassifySharedCommand // #ifndef __Mothur__classifysvmsharedcommand__ #define __Mothur__classifysvmsharedcommand__ #include "command.hpp" #include "inputdata.h" #include "svm.hpp" #include "designmap.h" class ClassifySvmSharedCommand : public Command { public: ClassifySvmSharedCommand(string); ~ClassifySvmSharedCommand() = default;; vector setParameters(); string getCommandName() { return "classify.svm"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Classify.svm\n"; } string getDescription() { return "implements the support vector machine machine learning algorithm to identify OTUs that can be used to differentiate between various groups of samples"; } int execute(); void help() { m->mothurOut(getHelpString()); } void readSharedAndDesignFiles(const string&, const string&, LabeledObservationVector&, FeatureVector&); void readSharedRAbundVectors(vector&, DesignMap&, LabeledObservationVector&, FeatureVector&, vector); vector& getSmocList() { return smocList; } const KernelParameterRangeMap& getKernelParameterRangeMap() { return kernelParameterRangeMap; } private: bool abort; vector outputNames, Groups; string sharedfile, designfile; set labels; bool allLines; int processors; bool useTiming; DesignMap designMap; // mode is either "rfe" or "classify" string mode; int evaluationFoldCount; int trainingFoldCount; vector smocList; KernelParameterRangeMap kernelParameterRangeMap; string transformName; int verbosity; double stdthreshold; void processSharedAndDesignData(vector lookup, vector); void trainSharedAndDesignData(vector lookup, vector); void getParameterValue(int& target, string pstring, int defaultvalue) { if (pstring == "not found" or pstring == "") { target = defaultvalue; } else { util.mothurConvert(pstring, target); } } }; #endif /* defined(__Mothur__classifysvmsharedcommand__) */ mothur-1.48.0/source/commands/classifytreecommand.cpp000077500000000000000000000464571424121717000230050ustar00rootroot00000000000000// // classifytreecommand.cpp // Mothur // // Created by Sarah Westcott on 2/20/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "classifytreecommand.h" #include "phylotree.h" #include "treereader.h" //********************************************************************************************************************** vector ClassifyTreeCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none","tree-summary",false,true,true); parameters.push_back(ptree); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none","",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pmethod("output", "Multiple", "node-taxon", "node", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClassifyTreeCommand::getHelpString(){ try { string helpString = ""; helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n"; helpString += "If you provide a group file, the concensus for each group will also be provided. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n"; helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n"; helpString += "The classify.tree command parameters are tree, group, name, count and taxonomy. The tree and taxonomy files are required.\n"; helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n"; helpString += "The output parameter allows you to specify whether you want the tree node number displayed on the tree, or the taxonomy displayed. Default=node. Options are node or taxon.\n"; helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClassifyTreeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],taxonomy.summary"; } //makes file like: amazon.0.03.fasta else if (type == "tree") { pattern = "[filename],taxonomy.tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ClassifyTreeCommand::ClassifyTreeCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { treefile = ""; treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a tree file.\n"); abort = true; } }else { current->setTreeFile(treefile); } taxonomyfile = validParameter.validFile(parameters, "taxonomy"); if (taxonomyfile == "not open") { taxonomyfile = ""; abort = true; } else if (taxonomyfile == "not found") { taxonomyfile = ""; taxonomyfile = current->getTaxonomyFile(); if (taxonomyfile != "") { m->mothurOut("Using " + taxonomyfile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a taxonomy file.\n"); abort = true; } }else { current->setTaxonomyFile(taxonomyfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "51"; } util.mothurConvert(temp, cutoff); if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100.\n"); abort = true; } output = validParameter.valid(parameters, "output"); if (output == "not found") { output = "node"; } if ((output == "node") || (output == "taxon")) { }else { m->mothurOut("[ERROR]: " + output + "is not a valid output option. Valid output options are node or taxon.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand"); exit(1); } } //********************************************************************************************************************** int ClassifyTreeCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); long start = time(nullptr); /***************************************************/ // reading tree info // /***************************************************/ current->setTreeFile(treefile); TreeReader* reader = new TreeReader(treefile, groupfile, namefile); vector T = reader->getTrees(); CountTable* tmap = T[0]->getCountTable(); Tree* outputTree = T[0]; delete reader; if (namefile != "") { util.readNames(namefile, nameMap, nameCount); } if (m->getControl_pressed()) { delete tmap; delete outputTree; return 0; } util.readTax(taxonomyfile, taxMap, true); /***************************************************/ // get concensus taxonomies // /***************************************************/ getClassifications(outputTree); delete outputTree; delete tmap; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set tree file as new current treefile if (treefile != "") { string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } } m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to find the concensus taxonomies.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "execute"); exit(1); } } //********************************************************************************************************************** //traverse tree finding concensus taxonomy at each node //label node with a number to relate to output summary file //report all concensus taxonomies to file int ClassifyTreeCommand::getClassifications(Tree*& T){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(treefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(treefile)); string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //print headings out << "TreeNode\t"; if (groupfile != "") { out << "Group\t"; } out << "NumRep\tTaxonomy" << endl; string treeOutputDir = outputdir; if (outputdir == "") { treeOutputDir += util.hasPath(treefile); } variables["[filename]"] = treeOutputDir + util.getRootName(util.getSimpleName(treefile)); string outputTreeFileName = getOutputFileName("tree", variables); //create a map from tree node index to names of descendants, save time later map > > nodeToDescendants; //node# -> (groupName -> groupMembers) for (int i = 0; i < T->getNumNodes(); i++) { if (m->getControl_pressed()) { return 0; } nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants); } //for each node for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) { if (m->getControl_pressed()) { out.close(); return 0; } string tax = "not classifed"; int size; if (groupfile != "") { for (map >::iterator itGroups = nodeToDescendants[i].begin(); itGroups != nodeToDescendants[i].end(); itGroups++) { if (itGroups->first != "AllGroups") { tax = getTaxonomy(itGroups->second, size); out << (i+1) << '\t' << itGroups->first << '\t' << size << '\t' << tax << endl; } } }else { string group = "AllGroups"; tax = getTaxonomy(nodeToDescendants[i][group], size); out << (i+1) << '\t' << size << '\t' << tax << endl; } if (output == "node") { T->tree[i].setLabel(toString(i+1)); } else { string cleanedTax = tax; util.removeConfidences(cleanedTax); for (int j = 0; j < cleanedTax.length(); j++) { //special chars to trees - , ) ( ; [ ] : if ((cleanedTax[j] == ',') || (cleanedTax[j] == '(') || (cleanedTax[j] == ')') || (cleanedTax[j] == ';') || (cleanedTax[j] == ':') || (cleanedTax[j] == ']') || (cleanedTax[j] == '[')) { cleanedTax[j] = '_'; //change any special chars to _ so the tree can be read by tree readers } } T->tree[i].setLabel(cleanedTax); } } out.close(); ofstream outTree; util.openOutputFile(outputTreeFileName, outTree); outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName); T->print(outTree, "both"); outTree.close(); return 0; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "GetConcensusTaxonomies"); exit(1); } } //********************************************************************************************************************** string ClassifyTreeCommand::getTaxonomy(set names, int& size) { try{ string conTax = ""; size = 0; //create a tree containing sequences from this bin PhyloTree* phylo = new PhyloTree(); for (set::iterator it = names.begin(); it != names.end(); it++) { //if namesfile include the names if (namefile != "") { //is this sequence in the name file - namemap maps seqName -> repSeqName map::iterator it2 = nameMap.find(*it); if (it2 == nameMap.end()) { //this name is not in name file, skip it m->mothurOut((*it) + " is not in your name file. I will not include it in the consensus.\n"); }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique map::iterator itTax = taxMap.find((it2->second)); if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it if ((*it) != (it2->second)) { m->mothurOut((*it) + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus.\n"); } else { m->mothurOut((*it) + " is not in your taxonomy file. I will not include it in the consensus.\n"); } }else{ //add seq to tree int num = nameCount[(*it)]; // we know its there since we found it in nameMap for (int i = 0; i < num; i++) { phylo->addSeqToTree((*it)+toString(i), itTax->second); } size += num; } } }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique map::iterator itTax = taxMap.find((*it)); if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it m->mothurOut((*it) + " is not in your taxonomy file. I will not include it in the consensus.\n"); }else{ if (countfile != "") { int numDups = ct->getNumSeqs((*it)); for (int j = 0; j < numDups; j++) { phylo->addSeqToTree((*it), itTax->second); } size += numDups; }else{ //add seq to tree phylo->addSeqToTree((*it), itTax->second); size++; } } } if (m->getControl_pressed()) { delete phylo; return conTax; } } //build tree phylo->assignHeirarchyIDs(0); TaxNode currentNode = phylo->get(0); int myLevel = 0; //at each level while (currentNode.children.size() != 0) { //you still have more to explore TaxNode bestChild; int bestChildSize = 0; //go through children for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { TaxNode temp = phylo->get(itChild->second); //select child with largest accesions - most seqs assigned to it if (temp.accessions.size() > bestChildSize) { bestChild = phylo->get(itChild->second); bestChildSize = temp.accessions.size(); } } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); if (consensusConfidence >= cutoff) { //if yes, add it conTax += bestChild.name + "(" + toString(consensusConfidence) + ");"; myLevel++; }else{ //if no, quit break; } //move down a level currentNode = bestChild; } if (myLevel != phylo->getMaxLevel()) { while (myLevel != phylo->getMaxLevel()) { conTax += "unclassified;"; myLevel++; } } if (conTax == "") { conTax = "no_consensus;"; } delete phylo; return conTax; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "getTaxonomy"); exit(1); } } //********************************************************************************************************************** map > ClassifyTreeCommand::getDescendantList(Tree*& T, int i, map > > descendants){ try { map > names; map >::iterator it; map >::iterator it2; int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); // TreeMap* tmap = T->getTreeMap(); if (lc == -1) { //you are a leaf your only descendant is yourself vector groups = T->tree[i].getGroup(); set mynames; mynames.insert(T->tree[i].getName()); for (int j = 0; j < groups.size(); j++) { names[groups[j]] = mynames; } //mygroup -> me names["AllGroups"] = mynames; }else{ //your descedants are the combination of your childrens descendants names = descendants[lc]; for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) { it2 = names.find(it->first); //do we already have this group if (it2 == names.end()) { //nope, so add it names[it->first] = it->second; }else { for (set::iterator it3 = (it->second).begin(); it3 != (it->second).end(); it3++) { names[it->first].insert(*it3); } } } } return names; } catch(exception& e) { m->errorOut(e, "ClassifyTreeCommand", "getDescendantList"); exit(1); } } /*****************************************************************/ mothur-1.48.0/source/commands/classifytreecommand.h000077500000000000000000000025321424121717000224340ustar00rootroot00000000000000#ifndef Mothur_classifytreecommand_h #define Mothur_classifytreecommand_h // // classifytreecommand.h // Mothur // // Created by Sarah Westcott on 2/20/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "readtree.h" #include "treemap.h" #include "counttable.h" class ClassifyTreeCommand : public Command { public: ClassifyTreeCommand(string); ~ClassifyTreeCommand(){} vector setParameters(); string getCommandName() { return "classify.tree"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Classify.tree"; } string getDescription() { return "Find the consensus taxonomy for the descendant of each tree node"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string treefile, taxonomyfile, groupfile, namefile, countfile, output; bool abort; vector outputNames; int numUniquesInName, cutoff; map nameMap; map nameCount; map taxMap; CountTable* ct; int getClassifications(Tree*&); map > getDescendantList(Tree*&, int, map > >); string getTaxonomy(set, int&); }; #endif mothur-1.48.0/source/commands/clearcutcommand.cpp000077500000000000000000000377251424121717000221100ustar00rootroot00000000000000/* * clearcutcommand.cpp * Mothur * * Created by westcott on 5/11/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "clearcutcommand.h" #ifdef __cplusplus extern "C" { #endif #include "clearcut.h" #ifdef __cplusplus } #endif //********************************************************************************************************************** vector ClearcutCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pphylip); CommandParameter pfasta("fasta", "InputTypes", "", "", "FastaPhylip", "FastaPhylip", "none","tree",false,false,true); parameters.push_back(pfasta); CommandParameter pverbose("verbose", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pverbose); CommandParameter pquiet("quiet", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pquiet); CommandParameter pversion("version", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pversion); CommandParameter prseed("rseed", "String", "", "", "*", "", "","",false,false); parameters.push_back(prseed); CommandParameter pnorandom("norandom", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnorandom); CommandParameter pshuffle("shuffle", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pshuffle); CommandParameter pneighbor("neighbor", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pneighbor); CommandParameter pexpblen("expblen", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpblen); CommandParameter pexpdist("expdist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pexpdist); CommandParameter pDNA("DNA", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pDNA); CommandParameter pprotein("protein", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pprotein); CommandParameter pjukes("jukes", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pjukes); CommandParameter pkimura("kimura", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkimura); CommandParameter pstdout("stdout", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pstdout); CommandParameter pntrees("ntrees", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pntrees); CommandParameter pmatrixout("matrixout", "String", "", "", "", "", "","",false,false); parameters.push_back(pmatrixout); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["matrixout"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClearcutCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClearcutCommand::getHelpString(){ try { string helpString = ""; helpString += "The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n"; helpString += "For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n"; helpString += "The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n"; helpString += "The phylip parameter allows you to enter your phylip formatted distance matrix. \n"; helpString += "The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n"; helpString += "The version parameter prints out the version of clearcut you are using, default=F. \n"; helpString += "The verbose parameter prints out more output from clearcut, default=F. \n"; helpString += "The quiet parameter turns on silent operation mode, default=F. \n"; helpString += "The rseed parameter allows you to explicitly set the PRNG seed to a specific value. \n"; helpString += "The norandom parameter allows you to attempt joins deterministically, default=F. \n"; helpString += "The shuffle parameter allows you to randomly shuffle the distance matrix, default=F. \n"; helpString += "The neighbor parameter allows you to use traditional Neighbor-Joining algorithm, default=T. \n"; helpString += "The DNA parameter allows you to indicate your fasta file contains DNA sequences, default=F. \n"; helpString += "The protein parameter allows you to indicate your fasta file contains protein sequences, default=F. \n"; helpString += "The stdout parameter outputs your tree to STDOUT, default=F. \n"; helpString += "The matrixout parameter allows you to specify a filename to output a distance matrix to. \n"; helpString += "The ntrees parameter allows you to specify the number of output trees, default=1. \n"; helpString += "The expblen parameter allows you to use exponential notation for branch lengths, default=F. \n"; helpString += "The expdist parameter allows you to use exponential notation for distance outputs, default=F. \n"; helpString += "The clearcut command should be in the following format: \n"; helpString += "clearcut(phylip=yourDistanceFile) \n"; helpString += "Example: clearcut(phylip=abrecovery.phylip.dist) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClearcutCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClearcutCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "matrixout") { pattern = "[filename],"; } else if (type == "tree") { pattern = "[filename],tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClearcutCommand", "getOutputPattern"); exit(1); } } /**************************************************************************************/ ClearcutCommand::ClearcutCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { inputFile = fastafile; current->setFastaFile(fastafile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { inputFile = phylipfile; current->setPhylipFile(phylipfile); } if ((phylipfile == "") && (fastafile == "")) { //is there are current file available for either of these? //give priority to phylip, then fasta phylipfile = current->getPhylipFile(); if (phylipfile != "") { inputFile = phylipfile; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { fastafile = current->getFastaFile(); if (fastafile != "") { inputFile = fastafile; m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or fasta file before you can use the clearcut command.\n"); abort = true; } } } if ((phylipfile != "") && (fastafile != "")) { m->mothurOut("You must provide either a phylip formatted distance matrix or an aligned fasta file, not BOTH.\n"); abort=true; } if (outputdir == ""){ outputdir = util.hasPath(inputFile); } string temp; temp = validParameter.valid(parameters, "version"); if (temp == "not found"){ temp = "F"; } version = util.isTrue(temp); temp = validParameter.valid(parameters, "verbose"); if (temp == "not found"){ temp = "F"; } verbose = util.isTrue(temp); temp = validParameter.valid(parameters, "quiet"); if (temp == "not found"){ temp = "F"; } quiet = util.isTrue(temp); seed = validParameter.valid(parameters, "rseed"); if (seed == "not found"){ seed = "*"; } temp = validParameter.valid(parameters, "norandom"); if (temp == "not found"){ temp = "F"; } norandom = util.isTrue(temp); temp = validParameter.valid(parameters, "shuffle"); if (temp == "not found"){ temp = "F"; } shuffle = util.isTrue(temp); temp = validParameter.valid(parameters, "neighbor"); if (temp == "not found"){ temp = "T"; } neighbor = util.isTrue(temp); temp = validParameter.valid(parameters, "DNA"); if (temp == "not found"){ temp = "F"; } DNA = util.isTrue(temp); temp = validParameter.valid(parameters, "protein"); if (temp == "not found"){ temp = "F"; } protein = util.isTrue(temp); temp = validParameter.valid(parameters, "jukes"); if (temp == "not found"){ temp = "F"; } jukes = util.isTrue(temp); temp = validParameter.valid(parameters, "kimura"); if (temp == "not found"){ temp = "F"; } kimura = util.isTrue(temp); temp = validParameter.valid(parameters, "stdout"); if (temp == "not found"){ temp = "F"; } stdoutWanted = util.isTrue(temp); matrixout = validParameter.validPath(parameters, "matrixout"); if (matrixout == "not found"){ matrixout = ""; } ntrees = validParameter.valid(parameters, "ntrees"); if (ntrees == "not found"){ ntrees = "1"; } temp = validParameter.valid(parameters, "expblen"); if (temp == "not found"){ temp = "F"; } expblen = util.isTrue(temp); temp = validParameter.valid(parameters, "expdist"); if (temp == "not found"){ temp = "F"; } expdist = util.isTrue(temp); if ((fastafile != "") && ((!DNA) && (!protein))) { m->mothurOut("You must specify the type of sequences you are using: DNA or protein.\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "ClearcutCommand", "ClearcutCommand"); exit(1); } } /**************************************************************************************/ int ClearcutCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } //prepare filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFile)); string outputName = getOutputFileName("tree", variables); outputNames.push_back(outputName); outputTypes["tree"].push_back(outputName); int numArgs = 4; //clearcut, in, out and fastafile or phylipfile if (version) { numArgs++; } if (verbose) { numArgs++; } if (quiet) { numArgs++; } if (seed != "*") { numArgs++; } if (norandom) { numArgs++; } if (shuffle) { numArgs++; } if (neighbor) { numArgs++; } if (stdoutWanted) { numArgs++; } if (DNA) { numArgs++; } if (protein) { numArgs++; } if (jukes) { numArgs++; } if (kimura) { numArgs++; } if (matrixout != "") { numArgs++; } if (ntrees != "1") { numArgs++; } if (expblen) { numArgs++; } if (expdist) { numArgs++; } char** clearcutParameters; clearcutParameters = new char*[numArgs]; clearcutParameters[0] = util.mothurConvert("clearcut"); //you gave us a distance matrix if (phylipfile != "") { clearcutParameters[1] = util.mothurConvert("--distance"); } //you gave us a fastafile if (fastafile != "") { clearcutParameters[1] = util.mothurConvert("--alignment"); } int parameterCount = 2; if (version) { clearcutParameters[parameterCount] = util.mothurConvert("--version"); parameterCount++; } if (verbose) { clearcutParameters[parameterCount] = util.mothurConvert("--verbose"); parameterCount++; } if (quiet) { clearcutParameters[parameterCount] = util.mothurConvert("--input"); parameterCount++; } if (seed != "*") { string tempSeed = "--seed=" + seed; clearcutParameters[parameterCount] = util.mothurConvert(tempSeed); parameterCount++; } if (norandom) { clearcutParameters[parameterCount] = util.mothurConvert("--norandom"); parameterCount++; } if (shuffle) { clearcutParameters[parameterCount] = util.mothurConvert("--shuffle"); parameterCount++; } if (neighbor) { clearcutParameters[parameterCount] = util.mothurConvert("--neighbor"); parameterCount++; } string tempIn = "--in=" + inputFile; clearcutParameters[parameterCount] = util.mothurConvert(tempIn); parameterCount++; if (stdoutWanted) { clearcutParameters[parameterCount] = util.mothurConvert("--stdout"); parameterCount++; } else{ string tempOut = "--out=" + outputName; clearcutParameters[parameterCount] = util.mothurConvert(tempOut); parameterCount++; } if (DNA) { clearcutParameters[parameterCount] = util.mothurConvert("--DNA"); parameterCount++; } if (protein) { clearcutParameters[parameterCount] = util.mothurConvert("--protein"); parameterCount++; } if (jukes) { clearcutParameters[parameterCount] = util.mothurConvert("--jukes"); parameterCount++; } if (kimura) { clearcutParameters[parameterCount] = util.mothurConvert("--kimura"); parameterCount++; } if (matrixout != "") { string tempMatrix = "--matrixout=" + outputdir + matrixout; clearcutParameters[parameterCount] = util.mothurConvert(tempMatrix); parameterCount++; outputNames.push_back((outputdir + matrixout)); outputTypes["matrixout"].push_back((outputdir + matrixout)); } if (ntrees != "1") { string tempNtrees = "--ntrees=" + ntrees; clearcutParameters[parameterCount] = util.mothurConvert(tempNtrees); parameterCount++; } if (expblen) { clearcutParameters[parameterCount] = util.mothurConvert("--expblen"); parameterCount++; } if (expdist) { clearcutParameters[parameterCount] = util.mothurConvert("--expdist"); parameterCount++; } errno = 0; clearcut_main(numArgs, clearcutParameters); //free memory for(int i = 0; i < numArgs; i++) { delete[] clearcutParameters[i]; } delete[] clearcutParameters; if (!stdoutWanted) { //set first tree file as new current treefile string currentTree = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; current->setTreeFile(currentTree); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); } return 0; } catch(exception& e) { m->errorOut(e, "ClearcutCommand", "execute"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/clearcutcommand.h000077500000000000000000000027121424121717000215410ustar00rootroot00000000000000#ifndef CLEARCUTCOMMAND_H #define CLEARCUTCOMMAND_H /* * clearcutcommand.h * Mothur * * Created by westcott on 5/11/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" /* Evans, J., L. Sheneman, and J.A. Foster (2006) Relaxed Neighbor-Joining: A Fast Distance-Based Phylogenetic Tree Construction Method, J. Mol. Evol., 62, 785-792 */ /****************************************************************************/ class ClearcutCommand : public Command { public: ClearcutCommand(string); ~ClearcutCommand() = default; vector setParameters(); string getCommandName() { return "clearcut"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Sheneman L, Evans J, Foster JA (2006). Clearcut: a fast implementation of relaxed neighbor joining. Bioinformatics 22: 2823-4. \nhttp://www.mothur.org/wiki/Clearcut"; } string getDescription() { return "create a tree from a fasta or phylip file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string phylipfile, fastafile, matrixout, inputFile, seed, ntrees; bool version, verbose, quiet, norandom, shuffle, neighbor, expblen, expdist, stdoutWanted, kimura, jukes, protein, DNA; bool abort; vector outputNames; }; /****************************************************************************/ #endif mothur-1.48.0/source/commands/clustercommand.cpp000066400000000000000000001400401424121717000217450ustar00rootroot00000000000000/* * clustercommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "clustercommand.h" #include "readphylip.h" #include "readcolumn.h" #include "readmatrix.hpp" #include "sequence.hpp" #include "systemcommand.h" #include "sensspeccommand.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" //********************************************************************************************************************** vector ClusterCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none","list",false,false,true); parameters.push_back(pphylip); CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName","list",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName-FastaTaxName","rabund-sabund",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "","",false,false,true); parameters.push_back(pcount); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName","list",false,false,true); parameters.push_back(pcolumn); CommandParameter pcutoff("cutoff", "Number", "", "0.03", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted-agc-dgc-opti-unique", "opti", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter pinitialize("initialize", "Multiple", "oneotu-singleton", "singleton", "", "", "","",false,false,true); parameters.push_back(pinitialize); CommandParameter pmetric("metric", "Multiple", "mcc-sens-spec-tptn-fpfn-tp-tn-fp-fn-f1score-accuracy-ppv-npv-fdr", "mcc", "", "", "","",false,false,true); parameters.push_back(pmetric); CommandParameter pmetriccutoff("delta", "Number", "", "0.0001", "", "", "","",false,false,true); parameters.push_back(pmetriccutoff); CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(piters); CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshowabund); CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming); CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim); CommandParameter pvsearchlocation("vsearch", "String", "", "", "", "", "","",false,false); parameters.push_back(pvsearchlocation); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["sensspec"] = tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["steps"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClusterCommand::getHelpString(){ try { string helpString = ""; helpString += "The cluster command parameter options are phylip, column, name, count, method, cutoff, precision, sim, showabund, timing, metric, iters, initialize. Fasta or Phylip or column and name are required.\n"; helpString += "The phylip and column parameter allow you to enter your distance file. \n"; helpString += "The fasta parameter allows you to enter your fasta file for use with the agc or dgc methods. \n"; helpString += "The name parameter allows you to enter your name file. \n"; helpString += "The count parameter allows you to enter your count file. \n A count or name file is required if your distance file is in column format.\n"; helpString += "The iters parameter allow you to set the maxiters for the opticluster method. \n"; helpString += "The metric parameter allows to select the metric in the opticluster method. Options are Matthews correlation coefficient (mcc), sensitivity (sens), specificity (spec), true positives + true negatives (tptn), false positives + false negatives (fpfn), true positives (tp), true negative (tn), false positive (fp), false negative (fn), f1score (f1score), accuracy (accuracy), positive predictive value (ppv), negative predictive value (npv), false discovery rate (fdr). Default=mcc.\n"; helpString += "The initialize parameter allows to select the initial randomization for the opticluster method. Options are singleton, meaning each sequence is randomly assigned to its own OTU, or oneotu meaning all sequences are assigned to one otu. Default=singleton.\n"; helpString += "The delta parameter allows to set the stable value for the metric in the opticluster method (delta=0.0001). \n"; helpString += "The method parameter allows you to enter your clustering mothod. Options are furthest, nearest, average, weighted, agc, dgc, unique and opti. Default=opti. The agc and dgc methods require a fasta file."; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "The vsearch parameter allows you to specify the name and location of your vsearch executable if using agc or dgc clustering methods. By default mothur will look in your path, mothur's executable and mothur tools locations. You can set the vsearch location as follows, vsearch=/usr/bin/vsearch.\n"; helpString += "The cluster command should be in the following format: \n"; helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClusterCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } else if (type == "sensspec") { pattern = "[filename],[clustertag],sensspec"; } else if (type == "steps") { pattern = "[filename],[clustertag],steps"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. ClusterCommand::ClusterCommand(string option) : Command() { try{ //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { distfile = phylipfile; format = "phylip"; current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { distfile = columnfile; format = "column"; current->setColumnFile(columnfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { distfile = fastafile; format = "fasta"; current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "opti";} vector versionOutputs; bool foundTool = false; string programName = "vsearch"; programName += EXECUTABLE_EXT; vsearchLocation = validParameter.validPath(parameters, "vsearch"); if (vsearchLocation == "not found") { vsearchLocation = ""; if ((method == "agc") || (method == "dgc")) { foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); } } else { if ((method == "agc") || (method == "dgc")) { //test to make sure vsearch exists ifstream in; vsearchLocation = util.getFullPathName(vsearchLocation); bool ableToOpen = util.openInputFile(vsearchLocation, in, "no error"); in.close(); if(!ableToOpen) { m->mothurOut(vsearchLocation + " file does not exist or cannot be opened, ignoring.\n"); vsearchLocation = ""; programName = util.getSimpleName(vsearchLocation); vsearchLocation = ""; foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); } } } if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted") || (method == "agc") || (method == "dgc") || (method == "opti") || (method == "unique")) { } else { m->mothurOut("[ERROR]: Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted, agc, dgc, unique and opti.\n"); abort = true; } if (method != "unique") { if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { distfile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { distfile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { fastafile = current->getFastaFile(); if (fastafile != "") { distfile = fastafile; format = "fasta"; m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip, column or fasta file before you can use the cluster command, unless using the unique method.\n"); abort = true; } } } } else if (((phylipfile != "") && (columnfile != "")) || ((phylipfile != "") && (fastafile != "")) || ((fastafile != "") && (columnfile != ""))) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip, column or fasta.\n"); abort = true; } if (columnfile != "") { if ((namefile == "") && (countfile == "")){ namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format.\n"); abort = true; } } } } if ((method != "agc") && (method != "dgc")) { if ((columnfile == "") && (phylipfile == "")) { m->mothurOut("[ERROR]: You must provide a distance file unless you are using the agc, dgc or unique clustering methods, aborting\n."); abort = true; } } }else { if ((countfile == "") && (namefile == "")) { countfile = current->getCountFile(); if (countfile != "") { distfile = countfile; format = "count"; m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { namefile = current->getNameFile(); if (namefile != "") { distfile = namefile; format = "name"; m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a count or name file before you can use the cluster command with the unique method.\n"); abort = true; } } } else if(countfile != "") { format = "count"; } else if(namefile != "") { format = "name"; } } if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... //get user cutoff and precision or use defaults string temp; temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } //saves precision legnth for formatting below length = temp.length(); util.mothurConvert(temp, precision); temp = validParameter.valid(parameters, "sim"); if (temp == "not found") { temp = "F"; } sim = util.isTrue(temp); temp = validParameter.valid(parameters, "delta"); if (temp == "not found") { temp = "0.0001"; } util.mothurConvert(temp, stableMetric); metricName = validParameter.valid(parameters, "metric"); if (metricName == "not found") { metricName = "mcc"; } if ((metricName == "mcc") || (metricName == "sens") || (metricName == "spec") || (metricName == "tptn") || (metricName == "tp") || (metricName == "tn") || (metricName == "fp") || (metricName == "fn") || (metricName == "f1score") || (metricName == "accuracy") || (metricName == "ppv") || (metricName == "npv") || (metricName == "fdr") || (metricName == "fpfn") ){ } else { m->mothurOut("[ERROR]: Not a valid metric. Valid metrics are mcc, sens, spec, tp, tn, fp, fn, tptn, fpfn, f1score, accuracy, ppv, npv, fdr.\n"); abort = true; } initialize = validParameter.valid(parameters, "initialize"); if (initialize == "not found") { initialize = "singleton"; } if ((initialize == "singleton") || (initialize == "oneotu")){ } else { m->mothurOut("[ERROR]: Not a valid initialization. Valid initializations are singleton and oneotu.\n"); abort = true; } temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, maxIters); adjust=-1.0; bool setProcessors = true; temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ setProcessors=false; temp = current->getProcessors(); } processors = current->setProcessors(temp); if ((method == "agc") || (method == "dgc")) { if (fastafile == "") { m->mothurOut("[ERROR]: You must provide a fasta file when using the agc or dgc clustering methods, aborting\n."); abort = true;} }else if (setProcessors) { m->mothurOut("[WARNING]: You can only use the processors option when using the agc or dgc clustering methods. Using 1 processor.\n."); } cutOffSet = false; temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { if ((method == "opti") || (method == "agc") || (method == "dgc")) { temp = "0.03"; }else { temp = "0.15"; } } else { cutOffSet = true; } int pos = temp.find('-'); if (pos != string::npos) { //multiple cutoffs given if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { m->mothurOut("[WARNING]: Multiple cutoffs can only be specified when using the agc, dgc or opti method. Using 0.15. \n."); cutOffSet = false; temp = "0.15"; }else { util.splitAtDash(temp, cutoffs); temp = *cutoffs.begin(); } }else { cutoffs.insert(temp); } util.mothurConvert(temp, cutoff); showabund = validParameter.valid(parameters, "showabund"); if (showabund == "not found") { showabund = "T"; } timing = validParameter.valid(parameters, "timing"); if (timing == "not found") { timing = "F"; } } } catch(exception& e) { m->errorOut(e, "ClusterCommand", "ClusterCommand"); exit(1); } } //********************************************************************************************************************** ClusterCommand::~ClusterCommand(){} //********************************************************************************************************************** int ClusterCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //phylip file given and cutoff not given - use cluster.classic because it uses less memory and is faster if ((format == "phylip") && (!cutOffSet) && (method != "opti")) { m->mothurOut("\nYou are using a phylip file and no cutoff. I will run cluster.classic to save memory and time.\n"); //run unique.seqs for deconvolute results string inputString = "phylip=" + distfile; if (namefile != "") { inputString += ", name=" + namefile; } else if (countfile != "") { inputString += ", count=" + countfile; } inputString += ", precision=" + toString(precision); inputString += ", method=" + method; if (sim) { inputString += ", sim=T"; } else { inputString += ", sim=F"; } m->mothurOut("\n/------------------------------------------------------------/\n"); m->mothurOut("Running command: cluster.classic(" + inputString + ")\n"); Command* clusterClassicCommand = new ClusterDoturCommand(inputString); clusterClassicCommand->execute(); delete clusterClassicCommand; m->mothurOut("/------------------------------------------------------------/\n"); return 0; } time_t estart = time(nullptr); if (format == "fasta") { runVsearchCluster(); } else if (method == "opti") { runOptiCluster(); } else if (method == "unique") { runUniqueCluster(); } else { runMothurCluster(); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to cluster\n"); //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::runVsearchCluster(){ try { string vsearchFastafile = ""; VsearchFileParser* vParse; if ((namefile == "") && (countfile == "")) { vParse = new VsearchFileParser(fastafile); } else if (namefile != "") { vParse = new VsearchFileParser(fastafile, namefile, "name"); } else if (countfile != "") { vParse = new VsearchFileParser(fastafile, countfile, "count"); } else { m->mothurOut("[ERROR]: Opps, should never get here. ClusterCommand::runVsearchCluster() \n"); m->setControl_pressed(true); return 0; } if (m->getControl_pressed()) { delete vParse; return 0; } vsearchFastafile = vParse->getVsearchFile(); if (cutoff > 1.0) { m->mothurOut("You did not set a cutoff, using 0.03.\n"); cutoff = 0.03; } map counts; map variables; if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); tag = method; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string listFileName = getOutputFileName("list", variables); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); ofstream out; util.openOutputFile(listFileName, out); bool printHeaders = true; for (set::iterator it = cutoffs.begin(); it != cutoffs.end(); it++) { m->mothurOut("\n" + *it + "\n"); util.mothurConvert(*it, cutoff); //Run vsearch string ucVsearchFile = util.getSimpleName(vsearchFastafile) + ".clustered.uc"; string logfile = util.getSimpleName(vsearchFastafile) + ".clustered.log"; vsearchDriver(vsearchFastafile, ucVsearchFile, logfile); if (m->getControl_pressed()) { break; } //Convert outputted *.uc file into a list file ListVector list = vParse->createListFile(ucVsearchFile, vParse->getNumBins(logfile), toString(1.0-cutoff), counts); if (printHeaders) { printHeaders = false; }else { list.setPrintedLabels(printHeaders); } if (countfile != "") { list.print(out, counts); } else { list.print(out); } //remove temp files util.mothurRemove(ucVsearchFile); util.mothurRemove(logfile); } out.close(); util.mothurRemove(vsearchFastafile); delete vParse; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "runVsearchCluster"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::vsearchDriver(string inputFile, string ucClusteredFile, string logfile){ try { //vsearch --maxaccepts 16 --usersort --id 0.97 --minseqlength 30 --wordlength 8 --uc $ROOT.clustered.uc --cluster_smallmem $ROOT.sorted.fna --maxrejects 64 --strand both --log $ROOT.clustered.log --sizeorder //no sizeorder for dgc ucClusteredFile = util.getFullPathName(ucClusteredFile); inputFile = util.getFullPathName(inputFile); logfile = util.getFullPathName(logfile); //to allow for spaces in the path ucClusteredFile = "\"" + ucClusteredFile + "\""; inputFile = "\"" + inputFile + "\""; logfile = "\"" + logfile + "\""; vector cPara; string vsearchCommand = vsearchLocation; vsearchCommand = "\"" + vsearchCommand + "\" "; vector vsearchParameters; vsearchParameters.push_back(util.mothurConvert(vsearchCommand)); //--maxaccepts=16 vsearchParameters.push_back(util.mothurConvert("--maxaccepts=16")); //--threads=1 string processorsString = "--threads=" + toString(processors); vsearchParameters.push_back(util.mothurConvert(processorsString)); //--usersort vsearchParameters.push_back(util.mothurConvert("--usersort")); //--id=0.97 cutoff = abs(1.0 - cutoff); string cutoffString = toString(cutoff); if (cutoffString.length() > 4) { cutoffString = cutoffString.substr(0, 4); } else if (cutoffString.length() < 4) { for (int i = cutoffString.length(); i < 4; i++) { cutoffString += "0"; } } cutoffString = "--id=" + cutoffString; vsearchParameters.push_back(util.mothurConvert(cutoffString)); //--minseqlength=30 vsearchParameters.push_back(util.mothurConvert("--minseqlength=30")); //--wordlength=8 vsearchParameters.push_back(util.mothurConvert("--wordlength=8")); //--uc=$ROOT.clustered.uc string tempIn = "--uc=" + ucClusteredFile; vsearchParameters.push_back(util.mothurConvert(tempIn)); //--cluster_smallmem $ROOT.sorted.fna string tempSorted = "--cluster_smallmem=" + inputFile; vsearchParameters.push_back(util.mothurConvert(tempSorted)); //--maxrejects=64 vsearchParameters.push_back(util.mothurConvert("--maxrejects=64")); //--strand=both vsearchParameters.push_back(util.mothurConvert("--strand=both")); //--log=$ROOT.clustered.log string tempLog = "--log=" + logfile; vsearchParameters.push_back(util.mothurConvert(tempLog)); if (method == "agc") { //--sizeorder vsearchParameters.push_back(util.mothurConvert("--sizeorder")); } if (m->getDebug()) { m->mothurOut("[DEBUG]: "); for(int i = 0; i < vsearchParameters.size(); i++) { m->mothurOut(toString(vsearchParameters[i]) + "\t"); } m->mothurOut("\n"); } string commandString = ""; for (int i = 0; i < vsearchParameters.size(); i++) { commandString += toString(vsearchParameters[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif if (m->getDebug()) { m->mothurOut("[DEBUG]: vsearch cluster command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory for(int i = 0; i < vsearchParameters.size(); i++) { delete vsearchParameters[i]; } //remove "" from filenames ucClusteredFile = ucClusteredFile.substr(1, ucClusteredFile.length()-2); inputFile = inputFile.substr(1, inputFile.length()-2); logfile = logfile.substr(1, logfile.length()-2); return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "vsearchDriver"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::runMothurCluster(){ try { ReadMatrix* read; if (format == "column") { read = new ReadColumnMatrix(columnfile, sim); } //sim indicates whether its a similarity matrix else if (format == "phylip") { read = new ReadPhylipMatrix(phylipfile, sim); } else { m->setControl_pressed(true); return 0; } read->setCutoff(cutoff); NameAssignment* nameMap = nullptr; CountTable* ct = nullptr; map counts; if(namefile != ""){ nameMap = new NameAssignment(namefile); nameMap->readMap(); read->read(nameMap); }else if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, false, false); read->read(ct); counts = ct->getNameMap(); }else { read->read(nameMap); } list = read->getListVector(); matrix = read->getDMatrix(); if(countfile != "") { rabund = new RAbundVector(); createRabund(ct, list, rabund); //creates an rabund that includes the counts for the unique list delete ct; }else { rabund = new RAbundVector(list->getRAbundVector()); } delete read; if (m->getControl_pressed()) { //clean up delete list; delete matrix; delete rabund; if(countfile == ""){rabundFile.close(); sabundFile.close(); util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); } listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } //create cluster if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); } else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); } else if(method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust); } else if(method == "weighted"){ cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method, adjust); } tag = cluster->getTag(); if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } string listFileName = getOutputFileName("list", variables); if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); } util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; oldList = *list; print_start = true; start = time(nullptr); loops = 0; double saveCutoff = cutoff; bool printHeaders = true; while ((matrix->getSmallDist() <= cutoff) && (matrix->getNNodes() > 0)){ if (m->getControl_pressed()) { //clean up delete list; delete matrix; delete rabund; delete cluster; if(countfile == "") {rabundFile.close(); sabundFile.close(); util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); } listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } if (print_start && util.isTrue(timing)) { m->mothurOut("Clustering (" + tag + ") dist " + toString(matrix->getSmallDist()) + "/" + toString(util.roundDist(matrix->getSmallDist(), precision)) + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")"); cout.flush(); print_start = false; } cluster->update(cutoff); float dist = matrix->getSmallDist(); float rndDist = util.ceilDist(dist, precision); if(previousDist <= 0.0000 && !util.isEqual(dist, previousDist)) { printData("unique", counts, printHeaders); } else if(!util.isEqual(rndDist, rndPreviousDist)) { printData(toString(rndPreviousDist), counts, printHeaders); } previousDist = dist; rndPreviousDist = rndDist; oldRAbund = *rabund; oldList = *list; } if (print_start && util.isTrue(timing)) { m->mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) + "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")"); cout.flush(); print_start = false; } if(previousDist <= 0.0000) { printData("unique", counts, printHeaders); } else if(rndPreviousDistmothurOut("changed cutoff to " + toString(cutoff)+"\n"); } return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "runMothurCluster"); exit(1); } } //********************************************************************************************************************** void ClusterCommand::printData(string label, map& counts, bool& ph){ try { oldList.setPrintedLabels(ph); ph = false; if (util.isTrue(timing)) { m->mothurOut("\tTime: " + toString(time(nullptr) - start) + "\tsecs for " + toString(oldRAbund.getNumBins()) + "\tclusters. Updates: " + toString(loops)+"\n"); } print_start = true; loops = 0; start = time(nullptr); oldRAbund.setLabel(label); if (countfile == "") { oldRAbund.print(rabundFile); oldRAbund.getSAbundVector().print(sabundFile); } if (util.isTrue(showabund)) { oldRAbund.getSAbundVector().print(cout); } oldList.setLabel(label); if(countfile != "") { oldList.print(listFile, counts); }else { oldList.print(listFile); } } catch(exception& e) { m->errorOut(e, "ClusterCommand", "printData"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){ try { rabund->setLabel(list->getLabel()); for(int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { break; } vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]); } rabund->push_back(total); } return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "createRabund"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::runOptiCluster(){ try { if (!cutOffSet) { m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.03; } m->mothurOut("\nClustering " + distfile+"\n"); ClusterMetric* metric = nullptr; if (metricName == "mcc") { metric = new MCC(); } else if (metricName == "sens") { metric = new Sensitivity(); } else if (metricName == "spec") { metric = new Specificity(); } else if (metricName == "tptn") { metric = new TPTN(); } else if (metricName == "tp") { metric = new TP(); } else if (metricName == "tn") { metric = new TN(); } else if (metricName == "fp") { metric = new FP(); } else if (metricName == "fn") { metric = new FN(); } else if (metricName == "f1score") { metric = new F1Score(); } else if (metricName == "accuracy") { metric = new Accuracy(); } else if (metricName == "ppv") { metric = new PPV(); } else if (metricName == "npv") { metric = new NPV(); } else if (metricName == "fdr") { metric = new FDR(); } else if (metricName == "fpfn") { metric = new FPFN(); } else { return 0; } string nameOrCount = ""; string thisNamefile = ""; map counts; if (countfile != "") { nameOrCount = "count"; thisNamefile = countfile; CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } else if (namefile != "") { nameOrCount = "name"; thisNamefile = namefile; } string distfile = columnfile; if (format == "phylip") { distfile = phylipfile; } if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); tag = "opti_" + metric->getName(); string listFileName = fileroot+ tag + ".list"; ofstream listFile; util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string outputName = getOutputFileName("steps", variables); outputNames.push_back(outputName); outputTypes["steps"].push_back(outputName); ofstream outStep; util.openOutputFile(outputName, outStep); string sensspecFilename = fileroot+ tag + ".sensspec"; ofstream sensFile; util.openOutputFile(sensspecFilename, sensFile); outputNames.push_back(sensspecFilename); outputTypes["sensspec"].push_back(sensspecFilename); sensFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; m->mothurOut("\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); outStep << "iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; bool printHeaders = true; for (set::iterator it = cutoffs.begin(); it != cutoffs.end(); it++) { m->mothurOut("\n" + *it + "\n"); util.mothurConvert(*it, cutoff); OptiData* matrix = new OptiMatrix(distfile, thisNamefile, nameOrCount, format, cutoff, false); OptiCluster cluster(matrix, metric, 0); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; cluster.initialize(listVectorMetric, true, initialize); long long numBins = cluster.getNumBins(); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); m->mothurOut("0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); outStep << "0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t" << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); outStep << results[i] << "\t"; } m->mothurOutEndLine(); outStep << endl; while ((delta > stableMetric) && (iters < maxIters)) { long start = time(nullptr); if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); m->mothurOut(toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t"+ toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); outStep << (toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t") << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); outStep << results[i] << "\t"; } m->mothurOutEndLine(); outStep << endl; } m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { delete matrix; delete metric; metric = nullptr; return 0; } ListVector* list = cluster.getList(); list->setLabel(toString(cutoff)); if (printHeaders) { //only print headers the first time printHeaders = false; }else { list->setPrintedLabels(printHeaders); } if(countfile != "") { list->print(listFile, counts); } else { list->print(listFile); } delete list; results = cluster.getStats(tp, tn, fp, fn); sensFile << cutoff << '\t' << cutoff << '\t' << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { sensFile << results[i] << '\t'; } sensFile << '\n'; delete matrix; } listFile.close(); sensFile.close(); outStep.close(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "runOptiCluster"); exit(1); } } //********************************************************************************************************************** int ClusterCommand::runUniqueCluster(){ try { if (countfile != "") { distfile = countfile; } else if (namefile != "") { distfile = namefile; } m->mothurOut("\nClustering " + distfile+"\n"); ListVector list; list.setLabel("ASV"); map counts; if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); for (map::iterator it = counts.begin(); it != counts.end(); it++) { if (m->getControl_pressed()) { return 0; } list.push_back(it->first); } }else { map nameMap; util.readNames(namefile, nameMap); for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { if (m->getControl_pressed()) { return 0; } list.push_back(it->second); } } if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); tag = "unique"; string listFileName = fileroot+ tag + ".list"; ofstream listFile; util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); if(countfile != "") { list.print(listFile, counts); } else { list.print(listFile); } listFile.close(); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); SAbundVector sabund = list.getSAbundVector(); sabund.print(sabundFile); sabundFile.close(); RAbundVector rabund = list.getRAbundVector(); rabund.print(rabundFile); rabundFile.close(); } return 0; } catch(exception& e) { m->errorOut(e, "ClusterCommand", "runUniqueCluster"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/clustercommand.h000077500000000000000000000054311424121717000214210ustar00rootroot00000000000000#ifndef CLUSTERCOMMAND_H #define CLUSTERCOMMAND_H /* * clustercommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "listvector.hpp" #include "cluster.hpp" #include "counttable.h" #include "vsearchfileparser.h" #include "clusterdoturcommand.h" #include "opticluster.h" #include "optimatrix.h" #include "calculator.h" /* The cluster() command: The cluster command outputs a .list , .rabund and .sabund files. The cluster command parameter options are method, cuttoff and precision. No parameters are required. The cluster command should be in the following format: cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision). The acceptable methods are furthest, nearest and average. If you do not provide a method the default algorithm is furthest neighbor. The cluster() command outputs three files *.list, *.rabund, and *.sabund. */ class ClusterCommand : public Command { public: ClusterCommand(string); ~ClusterCommand(); vector setParameters(); string getCommandName() { return "cluster"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219.\nSchloss PD, Handelsman J (2005). Introducing DOTUR, a computer program for defining operational taxonomic units and estimating species richness. Appl Environ Microbiol 71: 1501-6.\nhttp://www.mothur.org/wiki/Cluster"; } string getDescription() { return "cluster your sequences into OTUs using a distance matrix"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: Cluster* cluster; SparseDistanceMatrix* matrix; ListVector* list; RAbundVector* rabund; RAbundVector oldRAbund; ListVector oldList; bool abort, sim, cutOffSet; string method, fileroot, tag, phylipfile, columnfile, namefile, format, distfile, countfile, fastafile, inputDir, vsearchLocation, metric, initialize; double cutoff, stableMetric; float adjust; string showabund, timing, metricName; int precision, length, maxIters, processors; ofstream sabundFile, rabundFile, listFile; set cutoffs; bool print_start; time_t start; unsigned long loops; void printData(string label, map&, bool&); vector outputNames; int createRabund(CountTable*&, ListVector*&, RAbundVector*&); int vsearchDriver(string, string, string); int runVsearchCluster(); int runOptiCluster(); int runMothurCluster(); int runUniqueCluster(); }; #endif mothur-1.48.0/source/commands/clusterdoturcommand.cpp000077500000000000000000000307731424121717000230410ustar00rootroot00000000000000/* * clusterdoturcommand.cpp * Mothur * * Created by westcott on 10/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "clusterdoturcommand.h" #include "clusterclassic.h" //********************************************************************************************************************** vector ClusterDoturCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","rabund-sabund",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClusterDoturCommand::getHelpString(){ try { string helpString = ""; helpString += "The cluster.classic command clusters using the algorithm from dotur. \n"; helpString += "The cluster.classic command parameter options are phylip, name, count, method, cuttoff, sim, precision. Phylip is required, unless you have a valid current file.\n"; helpString += "The cluster.classic command should be in the following format: \n"; helpString += "cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"; helpString += "The acceptable cluster methods are furthest, nearest, weighted and average. If no method is provided then average is assumed.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClusterDoturCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. ClusterDoturCommand::ClusterDoturCommand(string option) : Command() { try{ //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { abort = true; } else if (phylipfile == "not found") { phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You need to provide a phylip file with the cluster.classic command.\n");abort = true; } }else { current->setPhylipFile(phylipfile); } //check for optional parameter and set defaults namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; namefile = ""; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.classic command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } string temp; temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } //saves precision legnth for formatting below length = temp.length(); util.mothurConvert(temp, precision); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "sim"); if (temp == "not found") { temp = "F"; } sim = util.isTrue(temp); method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "average"; } if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { if (method == "furthest") { tag = "fn"; } else if (method == "nearest") { tag = "nn"; } else if (method == "average") { tag = "an"; } else if (method == "weighted") { tag = "wn"; } }else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "ClusterCommand"); exit(1); } } //********************************************************************************************************************** int ClusterDoturCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } ClusterClassic* cluster = new ClusterClassic(cutoff, method, sim); NameAssignment* nameMap = nullptr; CountTable* ct = nullptr; map counts; if(namefile != "") { nameMap = new NameAssignment(namefile); nameMap->readMap(); cluster->readPhylipFile(phylipfile, nameMap); delete nameMap; }else if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, false, false); cluster->readPhylipFile(phylipfile, ct); counts = ct->getNameMap(); delete ct; }else { cluster->readPhylipFile(phylipfile, nameMap); } tag = cluster->getTag(); if (m->getControl_pressed()) { delete cluster; return 0; } list = cluster->getListVector(); rabund = cluster->getRAbundVector(); if (outputdir == "") { outputdir += util.hasPath(phylipfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(phylipfile)); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } string listFileName = getOutputFileName("list", variables); if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); } util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); float previousDist = 0.00000; float rndPreviousDist = 0.00000; oldRAbund = *rabund; oldList = *list; bool printHeaders = true; int estart = time(nullptr); while ((cluster->getSmallDist() <= cutoff) && (cluster->getNSeqs() > 1)){ if (m->getControl_pressed()) { delete cluster; delete list; delete rabund; if(countfile == "") {rabundFile.close(); sabundFile.close(); util.mothurRemove((fileroot+ tag + ".rabund")); util.mothurRemove((fileroot+ tag + ".sabund")); } listFile.close(); util.mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } cluster->update(cutoff); float dist = cluster->getSmallDist(); float rndDist = util.ceilDist(dist, precision); if(previousDist <= 0.0000 && dist != previousDist) { printData("unique", counts, printHeaders); } else if(rndDist != rndPreviousDist) { printData(toString(rndPreviousDist), counts, printHeaders); } previousDist = dist; rndPreviousDist = rndDist; oldRAbund = *rabund; oldList = *list; } if(previousDist <= 0.0000) { printData("unique", counts, printHeaders); } else if(rndPreviousDistsecond).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to cluster\n"); return 0; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "execute"); exit(1); } } //********************************************************************************************************************** void ClusterDoturCommand::printData(string label, map& counts, bool& ph){ try { oldList.setPrintedLabels(ph); ph = false; oldRAbund.setLabel(label); if (countfile == "") { oldRAbund.print(rabundFile); oldRAbund.getSAbundVector().print(sabundFile); } oldRAbund.getSAbundVector().print(cout); oldList.setLabel(label); if(countfile != "") { oldList.print(listFile, counts); }else { oldList.print(listFile, true); } ph = false; } catch(exception& e) { m->errorOut(e, "ClusterDoturCommand", "printData"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/clusterdoturcommand.h000077500000000000000000000031611424121717000224750ustar00rootroot00000000000000#ifndef CLUSTERDOTURCOMMAND_H #define CLUSTERDOTURCOMMAND_H /* * clusterdoturcommand.h * Mothur * * Created by westcott on 10/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "nameassignment.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "listvector.hpp" class ClusterDoturCommand : public Command { public: ClusterDoturCommand(string); ~ClusterDoturCommand(){} vector setParameters(); string getCommandName() { return "cluster.classic"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219.\nSchloss PD, Handelsman J (2005). Introducing DOTUR, a computer program for defining operational taxonomic units and estimating species richness. Appl Environ Microbiol 71: 1501-6.\nhttp://www.mothur.org/wiki/Cluster.classic\n";} string getDescription() { return "cluster your sequences into OTUs using DOTUR’s method"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, sim; string method, fileroot, tag, phylipfile, namefile, countfile; double cutoff; int precision, length; ofstream sabundFile, rabundFile, listFile; NameAssignment* nameMap; ListVector* list; RAbundVector* rabund; RAbundVector oldRAbund; ListVector oldList; void printData(string label, map&, bool&); vector outputNames; }; #endif mothur-1.48.0/source/commands/clusterfitcommand.cpp000066400000000000000000002112471424121717000224600ustar00rootroot00000000000000// // clusterfitcommand.cpp // Mothur // // Created by Sarah Westcott on 1/22/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "clusterfitcommand.hpp" #include "readphylip.h" #include "readcolumn.h" #include "readmatrix.hpp" #include "sequence.hpp" #include "systemcommand.h" #include "sensspeccommand.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" //********************************************************************************************************************** vector ClusterFitCommand::setParameters(){ try { CommandParameter plist("reflist", "InputTypes", "", "", "", "", "","",false,true,true); parameters.push_back(plist); CommandParameter pfasta("fasta", "InputTypes", "", "", "", "", "","list",false,true,true); parameters.push_back(pfasta); CommandParameter prepfasta("reffasta", "InputTypes", "", "", "", "", "","",false,true,true); parameters.push_back(prepfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none","","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "","",false,false,true); parameters.push_back(pcount); CommandParameter prefname("refname", "InputTypes", "", "", "RefNameCount", "none","","",false,false,true); parameters.push_back(prefname); CommandParameter prefcount("refcount", "InputTypes", "", "", "RefNameCount", "none", "","",false,false,true); parameters.push_back(prefcount); CommandParameter prefcolumn("refcolumn", "InputTypes", "", "", "PhylipColumnRef", "", "ColumnName","",false,false,true); parameters.push_back(prefcolumn); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "", "ColumnName","",false,false,true); parameters.push_back(pcolumn); CommandParameter paccnos("accnos", "InputTypes", "", "", "", "", "","",false,false,true); parameters.push_back(paccnos); CommandParameter pcutoff("cutoff", "Number", "", "0.03", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "closed-open", "closed", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter prefweight("refweight", "Multiple", "none-abundance-connectivity", "none", "", "", "","",false,false,true); parameters.push_back(prefweight); CommandParameter pmetric("metric", "Multiple", "mcc-sens-spec-tptn-fpfn-tp-tn-fp-fn-f1score-accuracy-ppv-npv-fdr", "mcc", "", "", "","",false,false,true); parameters.push_back(pmetric); CommandParameter pmetriccutoff("delta", "Number", "", "0.0001", "", "", "","",false,false,true); parameters.push_back(pmetriccutoff); CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(piters); CommandParameter pdenovoiters("denovoiters", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pdenovoiters); CommandParameter pfitpercent("fitpercent", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pfitpercent); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter prefprint("printref", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prefprint); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["sensspec"] = tempOutNames; outputTypes["steps"] = tempOutNames; outputTypes["accnos"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::getHelpString(){ try { string helpString = ""; helpString += "The cluster.fit command parameter options are reflist, refcolumn, refname, refcount, fasta, name, count, column, accnos, method, cutoff, precent, metric, iters, initialize, denovoiters.\n"; helpString += "The refcolumn parameter allow you to enter your reference data distance file, to reduce processing time. \n"; helpString += "The column parameter allow you to enter your data distance file, to reduce processing time. \n"; helpString += "The fasta parameter allows you to enter your fasta file. \n"; helpString += "The reffasta parameter allows you to enter your fasta file for your reference dataset. \n"; helpString += "The reflist parameter allows you to enter your list file for your reference dataset. \n"; helpString += "The name parameter allows you to enter your name file. \n"; helpString += "The count parameter allows you to enter your count file.\nA count or name file is required if your distance file is in column format.\n"; helpString += "The refname parameter allows you to enter your reference name file. \n"; helpString += "The refcount parameter allows you to enter your reference count file.\nA refcount or refname file is required if your reference distance file is in column format.\n"; helpString += "The accnos parameter allows you to assign reference seqeunces by name. This can save time by allowing you to provide a distance matrix containing all the sequence distances rather than a sample matrix and reference matrix and mothur calculating the distances between the sample and reference.\n"; helpString += "The iters parameter allow you to set the maxiters for the opticluster method. \n"; helpString += "The denovoiters parameter allow you to set the number of randomizations to perform. \n"; helpString += "The fitpercent parameter allow you to set percentage of reads to be fitted. Default=50. Max=100, min=0.01.\n"; helpString += "The refweight parameter is used with the denovo method to allows you weight the selection of reference sequences. Options none, abundance and connectivity. Default=none.\n"; helpString += "The metric parameter allows to select the metric in the opticluster method. Options are Matthews correlation coefficient (mcc), sensitivity (sens), specificity (spec), true positives + true negatives (tptn), false positives + false negatives (fpfn), true positives (tp), true negative (tn), false positive (fp), false negative (fn), f1score (f1score), accuracy (accuracy), positive predictive value (ppv), negative predictive value (npv), false discovery rate (fdr). Default=mcc.\n"; helpString += "The printref parameter allows to indicate whether you want the reference seqs printed with the fit seqs. For example, if you are trying to see how a new patient's data changes the clustering, you want to set printref=t so the old patient and new patient OTUs are printed together. If you want to see how your data would fit with a reference like silva, setting printref=f would output only your sequences to the list file. By default printref=t for denovo clustering and printref=f when using a reference.\n"; helpString += "The delta parameter allows to set the stable value for the metric in the opticluster method (delta=0.0001). \n"; helpString += "The method parameter allows you to enter your clustering method. Options are closed and open. Default=closed.\n"; helpString += "The cluster.fit command should be in the following format: \n"; helpString += "cluster.fit(list=yourreflist, reffasta=yourReferenceFasta, fasta=yourFastaFile, count=yourCountFile) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "sensspec") { pattern = "[filename],sensspec"; } else if (type == "steps") { pattern = "[filename],[clustertag],steps"; } else if (type == "accnos") { pattern = "[filename],accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. ClusterFitCommand::ClusterFitCommand(string option) : Command() { try{ //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; selfReference = true; createAccnos = false; refdistfile = ""; distfile = ""; //check for required parameters reffastafile = validParameter.validFile(parameters, "reffasta"); if (reffastafile == "not open") { abort = true; } else if (reffastafile == "not found") { reffastafile = ""; } else { selfReference = false; } refdistfile = validParameter.validFile(parameters, "refcolumn"); if (refdistfile == "not open") { refdistfile = ""; abort = true; } else if (refdistfile == "not found") { refdistfile = ""; } else { refformat = "column"; selfReference = false; } //allow ref list to be entered with denovo and accnos file reflistfile = validParameter.validFile(parameters, "reflist"); if (reflistfile == "not open") { abort = true; } else if (reflistfile == "not found") { reflistfile = ""; } //else { selfReference = false; } refnamefile = validParameter.validFile(parameters, "refname"); if (refnamefile == "not open") { abort = true; } else if (refnamefile == "not found") { refnamefile = ""; } else { selfReference = false; } refcountfile = validParameter.validFile(parameters, "refcount"); if (refcountfile == "not open") { abort = true; } else if (refcountfile == "not found") { refcountfile = ""; } else { selfReference = false; } if (!selfReference) { //if you are providing reference files, lets make sure we have all of them if ((refdistfile == "") || (reffastafile == "") || (reflistfile == "")) { m->mothurOut("[ERROR]: When providing a reference file, you must provide a reffasta, refcolumn, reflist and refcount or refname, aborting.\n"); abort = true; } } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { //if there is a current fasta file, use it if (!selfReference) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { fastafile = ""; } }else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { distfile = columnfile; current->setColumnFile(columnfile); } accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { accnosfile = ""; abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); createAccnos = false; } //extract reference names from reflist instead of accnos fil if (selfReference) { if ((reflistfile != "") && (accnosfile == "")) { createAccnos = true; } } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "open";} if ((method == "closed") || (method == "open")) { } else { m->mothurOut("[ERROR]: " + method + " is not a valid cluster fitting method. Valid options are closed and open.\n"); abort = true; } if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fit command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (!selfReference) { if ((columnfile == "") && (fastafile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { distfile = columnfile; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { fastafile = current->getFastaFile(); if (fastafile != "") { distfile = fastafile; m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("No valid current files. You must column or fasta file before you can use the cluster.fit command.\n"); abort = true; } } } }else { if (columnfile == "") { //is there are current file available for either of these? columnfile = current->getColumnFile(); if (columnfile != "") { distfile = columnfile; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a column file before you can use the cluster.fit command.\n"); abort = true; } } } if (columnfile != "") { if ((namefile == "") && (countfile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a namefile or countfile if you are going to use the column format.\n"); abort = true; } } } } string temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } length = temp.length(); ////saves precision length for formatting below util.mothurConvert(temp, precision); temp = validParameter.valid(parameters, "delta"); if (temp == "not found") { temp = "0.0001"; } util.mothurConvert(temp, stableMetric); metricName = validParameter.valid(parameters, "metric"); if (metricName == "not found") { metricName = "mcc"; } if ((metricName == "mcc") || (metricName == "sens") || (metricName == "spec") || (metricName == "tptn") || (metricName == "tp") || (metricName == "tn") || (metricName == "fp") || (metricName == "fn") || (metricName == "f1score") || (metricName == "accuracy") || (metricName == "ppv") || (metricName == "npv") || (metricName == "fdr") || (metricName == "fpfn") ){ } else { m->mothurOut("[ERROR]: Not a valid metric. Valid metrics are mcc, sens, spec, tp, tn, fp, fn, tptn, fpfn, f1score, accuracy, ppv, npv, fdr.\n"); abort = true; } refWeight = validParameter.valid(parameters, "refweight"); if (refWeight == "not found") { refWeight = "none"; } if ((refWeight == "none") || (refWeight == "abundance") || (refWeight == "connectivity")){ } else { m->mothurOut("[ERROR]: Not a valid reference weight. Valid refweight options are none, abundance and connectivity.\n"); abort = true; } initialize = "singleton"; temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "denovoiters"); if (temp == "not found") { if (selfReference) { temp = "10"; } else { temp = "1"; } } util.mothurConvert(temp, denovoIters); temp = validParameter.valid(parameters, "fitpercent"); if (temp == "not found") { temp = "50.0"; } util.mothurConvert(temp, fitPercent); if ((fitPercent > 100) || (fitPercent < 0.01)) { abort=true; m->mothurOut("[ERROR]: fitpercent must be less than 100, and more than 0.01.\n"); } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); adjust=-1.0; temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "0.03"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "printref"); if (temp == "not found") { if (selfReference) { temp = "t"; }else { temp = "f"; } } printref = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "ClusterFitCommand"); exit(1); } } //********************************************************************************************************************** ClusterFitCommand::~ClusterFitCommand(){} //********************************************************************************************************************** int ClusterFitCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } time_t estart = time(nullptr); ClusterMetric* metric = nullptr; if (metricName == "mcc") { metric = new MCC(); } else if (metricName == "sens") { metric = new Sensitivity(); } else if (metricName == "spec") { metric = new Specificity(); } else if (metricName == "tptn") { metric = new TPTN(); } else if (metricName == "tp") { metric = new TP(); } else if (metricName == "tn") { metric = new TN(); } else if (metricName == "fp") { metric = new FP(); } else if (metricName == "fn") { metric = new FN(); } else if (metricName == "f1score") { metric = new F1Score(); } else if (metricName == "accuracy") { metric = new Accuracy(); } else if (metricName == "ppv") { metric = new PPV(); } else if (metricName == "npv") { metric = new NPV(); } else if (metricName == "fdr") { metric = new FDR(); } else if (metricName == "fpfn") { metric = new FPFN(); } map counts; string dupsFile = countfile; nameOrCount = "count"; if (namefile != "") { dupsFile = namefile; nameOrCount = "name"; } else { CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); string listFile = ""; string bestListFileName = ""; string outputName = ""; if (selfReference) { //de novo map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = "optifit_" + metric->getName(); outputName = getOutputFileName("steps", variables); if ((accnosfile == "") && (!createAccnos)) { //denovo with mothur randomly assigning references m->mothurOut("\nRandomly assigning reads from " + distfile + " as reference sequences\n"); //distfile, distFormat, dupsFile, dupsFormat, cutoff, percentage to be fitseqs - will randomly assign as fit OptiData* matrix = new OptiRefMatrix(distfile, "column", dupsFile, nameOrCount, cutoff, fitPercent, refWeight); runDenovoOptiCluster(matrix, metric, counts, outputName); string sensspecFilename = fileroot+ tag + ".sensspec"; ofstream sensFile; util.openOutputFile(sensspecFilename, sensFile); outputNames.push_back(sensspecFilename); outputTypes["sensspec"].push_back(sensspecFilename); //evaluate results bestListFileName = compareSensSpec(matrix, metric, sensFile); delete matrix; }else { //reference with accnos file or reference list file assigning references unordered_set refNames; vector refLabels; vector< vector > otus; if (accnosfile != "") { //use accnos file to assign references m->mothurOut("\nUsing sequences from " + accnosfile + " as reference sequences\n"); refNames = util.readAccnos(accnosfile); }else if (createAccnos) { //assign references based on reflist parameter m->mothurOut("\nUsing OTUs from " + reflistfile + " as reference OTUs\n"); InputData input(reflistfile, "list", nullVector); set processedLabels, userLabels; string lastLabel = ""; ListVector* reflist = util.getNextList(input, true, userLabels, processedLabels, lastLabel); refLabels = reflist->getLabels(); for (int i = 0; i < refLabels.size(); i++) { refLabels[i] = "Ref_" + refLabels[i]; } refNames = util.getSetFromList(reflist, otus); delete reflist; } //distfile, distFormat, dupsFile, dupsFormat, cutoff, accnos containing refseq name OptiData* matrix = new OptiRefMatrix(distfile, "column", dupsFile, nameOrCount, cutoff, refNames); //fit seqs ListVector* list = runUserRefOptiCluster(matrix, metric, counts, outputName, refLabels, otus); ofstream listFile; string listFileName = fileroot+ tag + ".list"; util.openOutputFile(listFileName, listFile); if(countfile != "") { list->print(listFile, counts); } else { list->print(listFile); } listFile.close(); listFiles.push_back(listFileName); bestListFileName = listFileName; delete list; delete matrix; } }else { //reference with files containing reference seqs createReferenceNameCount(); //creates reference name or count file if needed string distanceFile = calcDists(); //calc distance matrix for fasta file and distances between fasta file and reffasta file if (outputdir == "") { outputdir += util.hasPath(distanceFile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distanceFile)); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = "optifit_" + metric->getName(); outputName = getOutputFileName("steps", variables); m->mothurOut("\nUsing OTUs from " + reflistfile + " as reference OTUs\n"); //calc sens.spec values for reference InputData input(reflistfile, "list", nullVector); ListVector* list = input.getListVector(); //add tag to OTULabels to indicate the reference vector refListLabels = list->getLabels(); for (int i = 0; i < refListLabels.size(); i++) { refListLabels[i] = "Ref_" + refListLabels[i]; } list->setLabels(refListLabels); string refDupsFile = refcountfile; if (refNameOrCount == "name") { refDupsFile = refnamefile; } OptiData* matrix = new OptiRefMatrix(refdistfile, refDupsFile, refNameOrCount, refformat, cutoff, distfile, dupsFile, nameOrCount, "column", comboDistFile, "column"); listFile = runRefOptiCluster(matrix, metric, list, counts, outputName); listFiles.push_back(listFile); bestListFileName = listFile; delete matrix; } delete metric; if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } outputNames.push_back(outputName); outputTypes["steps"].push_back(outputName); outputNames.push_back(bestListFileName); outputTypes["list"].push_back(bestListFileName); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to fit sequences to reference OTUs.\n"); //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "execute"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::runDenovoOptiCluster(OptiData*& matrix, ClusterMetric*& metric, map& counts, string outStepFile){ try { m->mothurOut("\nClustering " + distfile + "\n"); bool printStepsHeader = true; for (int i = 0; i < denovoIters; i++) { OptiFitCluster cluster(matrix, metric, 0); tag = cluster.getTag(); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; //get "ref" seqs for initialize inputs OptiData* refMatrix = matrix->extractRefMatrix(); ListVector* refList = clusterRefs(refMatrix, metric); delete refMatrix; vector > otus; for (int i = 0; i < refList->getNumBins(); i++) { vector binNames; string bin = refList->get(i); if (bin != "") { util.splitAtComma(bin, binNames); otus.push_back(binNames); } } //add tag to OTULabels to indicate the reference vector refListLabels = refList->getLabels(); for (int i = 0; i < refListLabels.size(); i++) { refListLabels[i] = "Ref_" + refListLabels[i]; } refList->setLabels(refListLabels); cluster.initialize(listVectorMetric, true, otus, refList->getLabels(), method, true); delete refList; long long numBins = cluster.getNumBins(); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); double fittp, fittn, fitfp, fitfn; long long numFitBins = cluster.getNumFitBins(); vector fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); m->mothurOut("\nFitting " + toString(matrix->getNumFitSeqs()+matrix->getNumFitSingletons()+matrix->getNumFitTrueSingletons()) + " sequences to reference otus.\n"); m->mothurOut("\n\nlist\tstate\titer\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, 0, false, 0); while ((delta > stableMetric) && (iters < maxIters)) { // if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); numFitBins = cluster.getNumFitBins(); fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, iters, false, i); } outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, iters, true, i); m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { return 0; } ofstream listFile; tag = "optifit_" + metric->getName() + "_denovo." + toString(i+1); string listFileName = fileroot+ tag + ".list"; util.openOutputFile(listFileName, listFile); ListVector* list = cluster.getFittedList(toString(cutoff), printref); list->setLabel(toString(cutoff)); list->setLabels(nullVector); if(countfile != "") { list->print(listFile, counts); } else { list->print(listFile); } listFile.close(); listFiles.push_back(listFileName); delete list; matrix->randomizeRefs(); } tag = "optifit_" + metric->getName() + "_denovo"; string listFileName = fileroot+ tag + ".list"; return listFileName; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "runDenovoOptiCluster"); exit(1); } } //********************************************************************************************************************** ListVector* ClusterFitCommand::runUserRefOptiCluster(OptiData*& matrix, ClusterMetric*& metric, map& counts, string outStepFile, vector refListLabels, vector > otus){ try { bool printStepsHeader = true; OptiFitCluster cluster(matrix, metric, 0); tag = cluster.getTag(); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; if (!createAccnos) { m->mothurOut("\nClustering references from " + distfile + "\n"); //get "ref" seqs for initialize inputs OptiData* refMatrix = matrix->extractRefMatrix(); ListVector* refList = clusterRefs(refMatrix, metric); delete refMatrix; for (int i = 0; i < refList->getNumBins(); i++) { vector binNames; string bin = refList->get(i); if (bin != "") { util.splitAtComma(bin, binNames); otus.push_back(binNames); } } //add tag to OTULabels to indicate the reference refListLabels = refList->getLabels(); for (int i = 0; i < refListLabels.size(); i++) { refListLabels[i] = "Ref_" + refListLabels[i]; } refList->setLabels(refListLabels); delete refList; } cluster.initialize(listVectorMetric, true, otus, refListLabels, method, false); long long numBins = cluster.getNumBins(); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); double fittp, fittn, fitfp, fitfn; long long numFitBins = cluster.getNumFitBins(); vector fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); m->mothurOut("\nFitting " + toString(matrix->getNumFitSeqs()+matrix->getNumFitSingletons()+matrix->getNumFitTrueSingletons()) + " sequences to reference otus.\n"); m->mothurOut("\n\nlist\tstate\titer\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, 0, false, 0); while ((delta > stableMetric) && (iters < maxIters)) { // if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); numFitBins = cluster.getNumFitBins(); fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, iters, false, 0); } m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { return 0; } ListVector* list = cluster.getFittedList(toString(cutoff), printref); list->setLabel(toString(cutoff)); string sensspecFilename = fileroot+ tag + ".sensspec"; ofstream sensFile; util.openOutputFile(sensspecFilename, sensFile); outputNames.push_back(sensspecFilename); outputTypes["sensspec"].push_back(sensspecFilename); if (method == "closed") { sensFile << "label\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; int numBins = list->getNumBins(); if (printref) { //combo results = cluster.getStats(tp, tn, fp, fn); sensFile << cutoff << '\t' << cutoff << '\t' << numBins << '\t' << tp << '\t' << tn << '\t' << fp << '\t' << fn; for (int i = 0; i < results.size(); i++) { sensFile << '\t' << results[i]; } sensFile << '\n'; }else { //fit fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); sensFile << cutoff << '\t' << cutoff << '\t' << numBins << '\t' << fittp << '\t' << fittn << '\t' << fitfp << '\t' << fitfn; for (int i = 0; i < fitresults.size(); i++) { sensFile << "\t" << fitresults[i]; } sensFile << endl; } set unfitted = cluster.getUnfittedNames(); string accnosFilename = fileroot+ "optifit_scrap.accnos"; outputNames.push_back(accnosFilename); outputTypes["accnos"].push_back(accnosFilename); ofstream accOut; util.openOutputFile(accnosFilename, accOut); for (set::iterator it = unfitted.begin(); it != unfitted.end(); it++) { accOut << *it << endl; } accOut.close(); }else { runSensSpec(matrix, metric, list, counts, sensFile); } sensFile.close(); return list; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "runUserRefOptiCluster"); exit(1); } } /***********************************************************************/ ListVector* ClusterFitCommand::clusterRefs(OptiData*& refsMatrix, ClusterMetric*& metric) { try { m->mothurOut("\nClustering " + toString(refsMatrix->getNumSeqs()+refsMatrix->getNumSingletons()) + " reference sequences.\n"); ListVector* list = nullptr; OptiCluster cluster(refsMatrix, metric, 0); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; cluster.initialize(listVectorMetric, true, "singleton"); long long numBins = cluster.getNumBins(); m->mothurOut("\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); m->mothurOut("0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); while ((delta > 0.0001) && (iters < maxIters)) { long start = time(nullptr); if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); m->mothurOut(toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t"+ toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { return list; } list = cluster.getList(); list->setLabel(toString(cutoff)); return list; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "clusterRefs"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::runRefOptiCluster(OptiData*& matrix, ClusterMetric*& metric, ListVector*& refList, map& counts, string outStepFile){ try { OptiFitCluster cluster(matrix, metric, 0); tag = cluster.getTag(); m->mothurOut("\nClustering " + distfile + "\n"); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; vector > otus; for (int i = 0; i < refList->getNumBins(); i++) { vector binNames; string bin = refList->get(i); if (bin != "") { util.splitAtComma(bin, binNames); otus.push_back(binNames); } } map refCounts; if (refcountfile != "") { CountTable refct; refct.readTable(refcountfile, false, false); refCounts = refct.getNameMap(); }else if (refnamefile != "") { refCounts = util.readNames(refnamefile); } else { //assume unique for (int i = 0; i < otus.size(); i++) { for (int j = 0; j < otus[i].size(); j++) { refCounts[otus[i][j]] = 1; } } } counts.insert(refCounts.begin(), refCounts.end()); cluster.initialize(listVectorMetric, true, otus, refList->getLabels(), method, false); long long numBins = cluster.getNumBins(); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); double fittp, fittn, fitfp, fitfn; long long numFitBins = cluster.getNumFitBins(); vector fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); bool printStepsHeader = true; outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, 0, true, 0); while ((delta > stableMetric) && (iters < maxIters)) { // if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); numFitBins = cluster.getNumFitBins(); fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); outputSteps(outStepFile, printStepsHeader, tp, tn, fp, fn, results, numBins, fittp, fittn, fitfp, fitfn, fitresults, numFitBins, iters, true, 0); } m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { return 0; } ListVector* list = cluster.getFittedList(toString(cutoff), printref); list->setLabel(toString(cutoff)); ofstream listFile; string listFileName = fileroot+ tag + ".list"; util.openOutputFile(listFileName, listFile); if(countfile != "") { list->print(listFile, counts); } else { list->print(listFile); } listFile.close(); string sensspecFilename = fileroot+ tag + ".sensspec"; ofstream sensFile; util.openOutputFile(sensspecFilename, sensFile); outputNames.push_back(sensspecFilename); outputTypes["sensspec"].push_back(sensspecFilename); if (method == "closed") { sensFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; if (printref) { //combo results = cluster.getStats(tp, tn, fp, fn); sensFile << cutoff << '\t' << cutoff << '\t' << tp << '\t' << tn << '\t' << fp << '\t' << fn; for (int i = 0; i < results.size(); i++) { sensFile << '\t' << results[i]; } sensFile << '\n'; }else { //fit fitresults = cluster.getFitStats(fittp, fittn, fitfp, fitfn); sensFile << cutoff << '\t' << cutoff << '\t' << fittp << '\t' << fittn << '\t' << fitfp << '\t' << fitfn; for (int i = 0; i < fitresults.size(); i++) { sensFile << "\t" << fitresults[i]; } sensFile << endl; } set unfitted = cluster.getUnfittedNames(); string accnosFilename = fileroot+ "optifit_scrap.accnos"; outputNames.push_back(accnosFilename); outputTypes["accnos"].push_back(accnosFilename); ofstream accOut; util.openOutputFile(accnosFilename, accOut); for (set::iterator it = unfitted.begin(); it != unfitted.end(); it++) { accOut << *it << endl; } accOut.close(); }else { runSensSpec(matrix, metric, list, counts, sensFile); } sensFile.close(); delete list; return listFileName; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "runRefOptiCluster"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::compareSensSpec(OptiData*& matrix, ClusterMetric*& userMetric, ofstream& sensSpecFile) { try { sensSpecFile << "iter\tlabel\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; m->mothurOut("iter\tlabel\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); double bestStat = 0; int bestResult = 0; if ((method == "open") && (printref)) { for (int i = 0; i < listFiles.size(); i++) { string thislistFileName = listFiles[i]; InputData input(thislistFileName, "list", nullVector); ListVector* list = input.getListVector(); string label = list->getLabel(); int numBins = list->getNumBins(); SensSpecCalc senscalc(*matrix, list); double truePositives, trueNegatives, falsePositives, falseNegatives; senscalc.getResults(*matrix, truePositives, trueNegatives, falsePositives, falseNegatives); double tp = truePositives; double fp = falsePositives; double tn = trueNegatives; double fn = falseNegatives; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); sensSpecFile << i+1 << '\t' << label << '\t' << cutoff << '\t' << numBins << '\t'; sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; sensSpecFile << setprecision(4); sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t'; sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl; m->mothurOut(toString(i+1) + "\t" + label + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(truePositives) + "\t" + toString(trueNegatives) + "\t" + toString(falsePositives) + "\t" + toString(falseNegatives) + "\t"); m->mothurOut(toString(sensitivity) + "\t" + toString(specificity) + "\t" + toString(positivePredictiveValue) + "\t" + toString(negativePredictiveValue) + "\t"); m->mothurOut(toString(falseDiscoveryRate) + "\t" + toString(accuracy) + "\t" + toString(matthewsCorrCoef) + "\t" + toString(f1Score) + "\n\n"); double userStat = userMetric->getValue(tp, tn, fp, fn); if (userStat > bestStat) { bestStat = userStat; bestResult = i; } } }else { for (int i = 0; i < listFiles.size(); i++) { InputData input(listFiles[i], "list", nullVector); ListVector* list = input.getListVector(); string label = list->getLabel(); int numBins = list->getNumBins(); //extract seqs in list file from matrix set listNames; for (int i = 0; i < list->getNumBins(); i++){ string bin = list->get(i); if (bin != "") { vector binSeqs; util.splitAtComma(bin, binSeqs); for (int j = 0; j < binSeqs.size(); j++) { listNames.insert(binSeqs[j]); } } } OptiData* fitMatrix = matrix->extractMatrixSubset(listNames); SensSpecCalc senscalc(*fitMatrix, list); double truePositives, trueNegatives, falsePositives, falseNegatives; senscalc.getResults(*fitMatrix, truePositives, trueNegatives, falsePositives, falseNegatives); delete fitMatrix; double tp = truePositives; double fp = falsePositives; double tn = trueNegatives; double fn = falseNegatives; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); sensSpecFile << i+1 << '\t' << label << '\t' << cutoff << '\t' << numBins << '\t'; sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; sensSpecFile << setprecision(4); sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t'; sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl; m->mothurOut(toString(i+1) + "\t" + label + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(truePositives) + "\t" + toString(trueNegatives) + "\t" + toString(falsePositives) + "\t" + toString(falseNegatives) + "\t"); m->mothurOut(toString(sensitivity) + "\t" + toString(specificity) + "\t" + toString(positivePredictiveValue) + "\t" + toString(negativePredictiveValue) + "\t"); m->mothurOut(toString(falseDiscoveryRate) + "\t" + toString(accuracy) + "\t" + toString(matthewsCorrCoef) + "\t" + toString(f1Score) + "\n\n"); double userStat = userMetric->getValue(tp, tn, fp, fn); if (userStat > bestStat) { bestStat = userStat; bestResult = i; } } } sensSpecFile.close(); return listFiles[bestResult]; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "compareSensSpec"); exit(1); } } //********************************************************************************************************************** void ClusterFitCommand::runSensSpec(OptiData*& matrix, ClusterMetric*& userMetric, ListVector*& list, map& counts, ofstream& sensSpecFile) { try { sensSpecFile << "label\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; m->mothurOut("label\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); if (method == "open") { double truePositives, trueNegatives, falsePositives, falseNegatives; string label = list->getLabel(); int numBins = list->getNumBins(); if (printref) { //pass whole matrix SensSpecCalc senscalc(*matrix, list); senscalc.getResults(*matrix, truePositives, trueNegatives, falsePositives, falseNegatives); }else { //pass subset matrix vector fSeqs = matrix->getFitSeqs(); set fitSeqs = util.mothurConvert(fSeqs); OptiData* fitMatrix = matrix->extractMatrixSubset(fitSeqs); SensSpecCalc senscalc(*fitMatrix, list); senscalc.getResults(*fitMatrix, truePositives, trueNegatives, falsePositives, falseNegatives); delete fitMatrix; } double tp = truePositives; double fp = falsePositives; double tn = trueNegatives; double fn = falseNegatives; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); sensSpecFile << label << '\t' << cutoff << '\t' << numBins << '\t'; sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; sensSpecFile << setprecision(4); sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t'; sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl; m->mothurOut(label + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(truePositives) + "\t" + toString(trueNegatives) + "\t" + toString(falsePositives) + "\t" + toString(falseNegatives) + "\t"); m->mothurOut(toString(sensitivity) + "\t" + toString(specificity) + "\t" + toString(positivePredictiveValue) + "\t" + toString(negativePredictiveValue) + "\t"); m->mothurOut(toString(falseDiscoveryRate) + "\t" + toString(accuracy) + "\t" + toString(matthewsCorrCoef) + "\t" + toString(f1Score) + "\n\n"); }else { m->mothurOut("[ERROR]: should never get here... \n"); } } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "runSensSpec"); exit(1); } } //********************************************************************************************************************** void ClusterFitCommand::outputSteps(string outputName, bool& printHeaders, double tp, double tn, double fp, double fn, vector results, long long numBins, double fittp, double fittn, double fitfp, double fitfn, vector fitresults, long long numFitBins, int iter, bool printToFile, int denovoIter) { try { if (!selfReference) { //writes to file as well if (printHeaders) { m->mothurOut("\n\nstate\titer\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); } m->mothurOut("combo\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); m->mothurOut("fit\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numFitBins) + "\t"+ toString(cutoff) + "\t" + toString(fittp) + "\t" + toString(fittn) + "\t" + toString(fitfp) + "\t" + toString(fitfn) + "\t"); for (int i = 0; i < fitresults.size(); i++) { m->mothurOut(toString(fitresults[i]) + "\t"); } m->mothurOutEndLine(); ofstream outStep; if (printHeaders) { util.openOutputFile(outputName, outStep); outStep << "state\titer\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; printHeaders = false; }else { util.openOutputFileAppend(outputName, outStep); } outStep << "combo\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t" << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { outStep << results[i] << "\t"; } outStep << endl; outStep << "fit\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numFitBins) + "\t" + toString(cutoff) + "\t" << fittp << '\t' << fittn << '\t' << fitfp << '\t' << fitfn << '\t'; for (int i = 0; i < fitresults.size(); i++) { outStep << fitresults[i] << "\t"; } outStep << endl; }else { //print results for each iter??? if (printToFile) { ofstream outStep; if (printHeaders) { util.openOutputFile(outputName, outStep); outStep << "list\t\tstate\titer\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; printHeaders = false; }else { util.openOutputFileAppend(outputName, outStep); } outStep << toString(denovoIter+1) + "\tcombo\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t" << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { outStep << results[i] << "\t"; } outStep << endl; outStep << toString(denovoIter+1) + "\tfit\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numFitBins) + "\t" + toString(cutoff) + "\t" << fittp << '\t' << fittn << '\t' << fitfp << '\t' << fitfn << '\t'; for (int i = 0; i < fitresults.size(); i++) { outStep << fitresults[i] << "\t"; } outStep << endl; }else { m->mothurOut(toString(denovoIter+1) + "\t" + "combo\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); m->mothurOut(toString(denovoIter+1) + "\t" +"fit\t" + toString(iter) + "\t" + toString(cutoff) + "\t" + toString(numFitBins) + "\t"+ toString(cutoff) + "\t" + toString(fittp) + "\t" + toString(fittn) + "\t" + toString(fitfp) + "\t" + toString(fitfn) + "\t"); for (int i = 0; i < fitresults.size(); i++) { m->mothurOut(toString(fitresults[i]) + "\t"); } m->mothurOutEndLine(); } } } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "outputSteps"); exit(1); } } //********************************************************************************************************************** void ClusterFitCommand::createReferenceNameCount() { try { if (refcountfile != "") { refNameOrCount = "count"; } else if (refnamefile != "") { refNameOrCount = "name"; } else { //create count file current->setMothurCalling(true); //preserve current file names string currentFasta = current->getFastaFile(); string currentCount = current->getCountFile(); string options = "fasta=" + reffastafile + ", format=count"; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + options + ")\n"); Command* deconvoluteCommand = new UniqueSeqsCommand(options); deconvoluteCommand->execute(); map > filenames = deconvoluteCommand->getOutputFiles(); refcountfile = filenames["count"][0]; refNameOrCount = "count"; //reset current filenames current->setFastaFile(currentFasta); current->setCountFile(currentCount); delete deconvoluteCommand; m->mothurOut("/******************************************/\n"); } } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "createReferenceNameCount"); exit(1); } } //********************************************************************************************************************** string ClusterFitCommand::calcDists() { try { //preserve current file names string currentFasta = current->getFastaFile(); string currentCount = current->getCountFile(); if (columnfile == "") { //calc user distances string currentColumn = current->getColumnFile(); string options = "fasta=" + fastafile + ", cutoff=" + toString(cutoff); current->setMothurCalling(true); //calc dists for fastafile m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: dist.seqs(" + options + ")\n"); Command* distCommand = new DistanceCommand(options); distCommand->execute(); map > filenames = distCommand->getOutputFiles(); distfile = filenames["column"][0]; columnfile = distfile; current->setColumnFile(currentColumn); delete distCommand; m->mothurOut("/******************************************/\n"); } map > filenames; int refAlignLength = util.getAlignmentLength(reffastafile); int alignLength = util.getAlignmentLength(fastafile); if (refAlignLength == alignLength) { string currentColumn = current->getColumnFile(); string options = "fitcalc=t, fasta=" + reffastafile + ", oldfasta=" + fastafile + ", cutoff=" + toString(cutoff) + ", column=" + distfile; //dists between reffasta and fastafile m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: dist.seqs(" + options + ")\n"); DistanceCommand* distCommand = new DistanceCommand(options); distCommand->execute(); filenames = distCommand->getOutputFiles(); comboDistFile = filenames["column"][0]; current->setColumnFile(currentColumn); delete distCommand; m->mothurOut("/******************************************/\n"); current->setMothurCalling(false); }else { //filter each file to improve distance calc time string options = "fasta=" + reffastafile + ", vertical=t"; m->mothurOut("\nRunning vertical filter to improve distance calculation time\n\n"); //filter reffasta m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: filter.seqs(" + options + ")\n"); Command* filterCommand = new FilterSeqsCommand(options); filterCommand->execute(); map > filenames = filterCommand->getOutputFiles(); string filteredRef = filenames["fasta"][0]; delete filterCommand; m->mothurOut("/******************************************/\n"); options = "fasta=" + fastafile + ", reference=" + filteredRef; //align fasta to refFasta m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: align.seqs(" + options + ")\n"); Command* alignCommand = new AlignCommand(options); alignCommand->execute(); filenames = alignCommand->getOutputFiles(); string alignedFasta = filenames["fasta"][0]; delete alignCommand; m->mothurOut("/******************************************/\n"); string currentColumn = current->getColumnFile(); options = "fitcalc=t, fasta=" + filteredRef + ", oldfasta=" + alignedFasta + ", cutoff=" + toString(cutoff) + ", column=" + distfile; //dists between reffasta and fastafile m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: dist.seqs(" + options + ")\n"); Command* distCommand = new DistanceCommand(options); distCommand->execute(); filenames = distCommand->getOutputFiles(); comboDistFile = filenames["column"][0]; current->setColumnFile(currentColumn); delete distCommand; m->mothurOut("/******************************************/\n"); current->setMothurCalling(false); } //reset current filenames current->setFastaFile(currentFasta); current->setCountFile(currentCount); return comboDistFile; } catch(exception& e) { m->errorOut(e, "ClusterFitCommand", "calcDists"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/clusterfitcommand.hpp000077500000000000000000000054611424121717000224670ustar00rootroot00000000000000// // clusterfitcommand.hpp // Mothur // // Created by Sarah Westcott on 1/22/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef clusterfitcommand_hpp #define clusterfitcommand_hpp #include "command.hpp" #include "listvector.hpp" #include "cluster.hpp" #include "counttable.h" #include "optifitcluster.hpp" #include "optirefmatrix.hpp" #include "calculator.h" #include "distancecommand.h" #include "aligncommand.h" #include "filterseqscommand.h" #include "uniqueseqscommand.h" #include "listseqscommand.h" #include "getdistscommand.h" #include "getseqscommand.h" class ClusterFitCommand : public Command { public: ClusterFitCommand(string); ~ClusterFitCommand(); vector setParameters(); string getCommandName() { return "cluster.fit"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "\nhttp://www.mothur.org/wiki/Cluster.fit"; } string getDescription() { return "fit your sequences into existing OTUs"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, sim, print_start, selfReference, printref, createAccnos; string refdistfile, reffastafile, refnamefile, refcountfile, reflistfile, refNameOrCount; string namefile, refformat, distfile, countfile, fastafile, columnfile, nameOrCount, accnosfile; string comboDistFile; string method, fileroot, tag, inputDir, metric, initialize, metricName, refWeight; double cutoff, stableMetric; float adjust, fitPercent; int precision, length, maxIters, processors, denovoIters; vector outputNames, listFiles; unsigned long loops; ListVector* runUserRefOptiCluster(OptiData*&, ClusterMetric*&, map&, string, vector, vector > ); string runRefOptiCluster(OptiData*&, ClusterMetric*&, ListVector*&, map&, string); string runDenovoOptiCluster(OptiData*&, ClusterMetric*&, map&, string); ListVector* clusterRefs(OptiData*& refsMatrix, ClusterMetric*&); void createReferenceNameCount(); string calcDists(); void runSensSpec(OptiData*& matrix, ClusterMetric*& userMetric, ListVector*& list, map& counts, ofstream&); string compareSensSpec(OptiData*& matrix, ClusterMetric*& userMetric, ofstream& sensSpecFile); //string runSensSpec(string distFileName, string dupsFile, string dupsFormat, ClusterMetric*&, string); void outputSteps(string outputName, bool& printHeaders, double tp, double tn, double fp, double fn, vector results, long long numBins, double fittp, double fittn, double fitfp, double fitfn, vector fitresults, long long numFitBins, int, bool, int); }; #endif /* clusterfitcommand_hpp */ mothur-1.48.0/source/commands/clusterfragmentscommand.cpp000077500000000000000000000364371424121717000236750ustar00rootroot00000000000000/* * ryanscommand.cpp * Mothur * * Created by westcott on 9/23/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "clusterfragmentscommand.h" #include "needlemanoverlap.hpp" //********************************************************************************************************************** //sort by unaligned inline bool comparePriority(seqRNode first, seqRNode second) { bool better = false; if (first.length > second.length) { better = true; }else if (first.length == second.length) { if (first.numIdentical > second.numIdentical) { better = true; } } return better; } //********************************************************************************************************************** vector ClusterFragmentsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pdiffs); CommandParameter ppercent("percent", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppercent); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClusterFragmentsCommand::getHelpString(){ try { string helpString = ""; helpString += "The cluster.fragments command groups sequences that are part of a larger sequence.\n"; helpString += "The cluster.fragments command outputs a new fasta and name or count file.\n"; helpString += "The cluster.fragments command parameters are fasta, name, count, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n"; helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n"; helpString += "The diffs parameter allows you to set the number of differences allowed, default=0. \n"; helpString += "The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n"; helpString += "You may use diffs and percent at the same time to say something like: If the number or differences is greater than 1 or more than 2% of the fragment length, don't merge. \n"; helpString += "The cluster.fragments command should be in the following format: \n"; helpString += "cluster.fragments(fasta=yourFastaFile, names=yourNamesFile) \n"; helpString += "Example cluster.fragments(fasta=amazon.fasta).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClusterFragmentsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],fragclust.fasta"; } else if (type == "name") { pattern = "[filename],fragclust.names"; } else if (type == "count") { pattern = "[filename],fragclust.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ClusterFragmentsCommand::ClusterFragmentsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { fastafile = ""; abort = true; } else { current->setFastaFile(fastafile); } if (outputdir == ""){ outputdir = util.hasPath(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... namefile = validParameter.validFile(parameters, "name"); if (namefile == "not found") { namefile = ""; } else if (namefile == "not open") { namefile = ""; abort = true; } else { util.readNames(namefile, names, sizes); current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { ct.readTable(countfile, true, false); current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fragments command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } string temp; temp = validParameter.valid(parameters, "diffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, diffs); temp = validParameter.valid(parameters, "percent"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, percent); } } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand"); exit(1); } } //********************************************************************************************************************** int ClusterFragmentsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); //reads fasta file and return number of seqs int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active if (m->getControl_pressed()) { return 0; } if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct.\n"); return 0; } //sort seqs by length of unaligned sequence sort(alignSeqs.begin(), alignSeqs.end(), comparePriority); int count = 0; //think about running through twice... for (int i = 0; i < numSeqs; i++) { if (alignSeqs[i].active) { //this sequence has not been merged yet string iBases = alignSeqs[i].seq.getUnaligned(); //try to merge it with all smaller seqs for (int j = i+1; j < numSeqs; j++) { if (m->getControl_pressed()) { return 0; } if (alignSeqs[j].active) { //this sequence has not been merged yet string jBases = alignSeqs[j].seq.getUnaligned(); if (isFragment(iBases, jBases)) { if (countfile != "") { ct.mergeCounts(alignSeqs[i].names, alignSeqs[j].names); }else { //merge alignSeqs[i].names += ',' + alignSeqs[j].names; alignSeqs[i].numIdentical += alignSeqs[j].numIdentical; } alignSeqs[j].active = 0; alignSeqs[j].numIdentical = 0; count++; } }//end if j active }//end if i != j //remove from active list alignSeqs[i].active = 0; }//end if active i if(i % 100 == 0) { m->mothurOutJustToScreen(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); } } if(numSeqs % 100 != 0) { m->mothurOutJustToScreen(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); } string fileroot = outputdir + util.getRootName(util.getSimpleName(fastafile)); map variables; variables["[filename]"] = fileroot; string newFastaFile = getOutputFileName("fasta", variables); string newNamesFile = getOutputFileName("name", variables); if (countfile != "") { newNamesFile = getOutputFileName("count", variables); } if (m->getControl_pressed()) { return 0; } m->mothurOut("\nTotal number of sequences before cluster.fragments was " + toString(alignSeqs.size()) + ".\n"); m->mothurOut("cluster.fragments removed " + toString(count) + " sequences.\n\n"); printData(newFastaFile, newNamesFile); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to cluster " + toString(numSeqs) + " sequences.\n"); if (m->getControl_pressed()) { util.mothurRemove(newFastaFile); util.mothurRemove(newNamesFile); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(newFastaFile); m->mothurOutEndLine(); m->mothurOut(newNamesFile); m->mothurOutEndLine(); outputNames.push_back(newFastaFile); outputNames.push_back(newNamesFile); outputTypes["fasta"].push_back(newFastaFile); outputTypes["name"].push_back(newNamesFile); m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "execute"); exit(1); } } //*************************************************************************************************************** bool ClusterFragmentsCommand::isFragment(string seq1, string seq2){ try { bool fragment = false; //exact match int pos = seq1.find(seq2); if (pos != string::npos) { return true; } //no match, no diffs wanted else if ((diffs == 0) && (percent == 0)) { return false; } else { //try aligning and see if you can find it //find number of acceptable differences for this sequence fragment int totalDiffs = 0; if (diffs == 0) { //you didnt set diffs you want a percentage totalDiffs = floor((seq2.length() * (percent / 100.0))); }else if (percent == 0) { //you didn't set percent you want diffs totalDiffs = diffs; }else if ((percent != 0) && (diffs != 0)) { //you want both, set total diffs to smaller of 2 totalDiffs = diffs; int percentDiff = floor((seq2.length() * (percent / 100.0))); if (percentDiff < totalDiffs) { totalDiffs = percentDiff; } } Alignment* alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (seq1.length()+totalDiffs+1)); //use needleman to align alignment->align(seq2, seq1); string tempSeq2 = alignment->getSeqAAln(); string temp = alignment->getSeqBAln(); delete alignment; //chop gap ends int startPos = 0; int endPos = tempSeq2.length()-1; for (int i = 0; i < tempSeq2.length(); i++) { if (isalpha(tempSeq2[i])) { startPos = i; break; } } for (int i = tempSeq2.length()-1; i >= 0; i--) { if (isalpha(tempSeq2[i])) { endPos = i; break; } } //count number of diffs int numDiffs = 0; for (int i = startPos; i <= endPos; i++) { if (tempSeq2[i] != temp[i]) { numDiffs++; } } if (numDiffs <= totalDiffs) { fragment = true; } } return fragment; } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "isFragment"); exit(1); } } /**************************************************************************************************/ int ClusterFragmentsCommand::readFASTA(){ try { ifstream inFasta; util.openInputFile(fastafile, inFasta); while (!inFasta.eof()) { if (m->getControl_pressed()) { inFasta.close(); return 0; } Sequence seq(inFasta); gobble(inFasta); if (seq.getName() != "") { //can get "" if commented line is at end of fasta file if (namefile != "") { itSize = sizes.find(seq.getName()); if (itSize == sizes.end()) { m->mothurOut(seq.getName() + " is not in your names file, please correct.\n"); exit(1); } else{ seqRNode tempNode(itSize->second, seq, names[seq.getName()], seq.getUnaligned().length()); alignSeqs.push_back(tempNode); } }else if(countfile != "") { seqRNode tempNode(ct.getNumSeqs(seq.getName()), seq, seq.getName(), seq.getUnaligned().length()); alignSeqs.push_back(tempNode); }else { //no names file, you are identical to yourself seqRNode tempNode(1, seq, seq.getName(), seq.getUnaligned().length()); alignSeqs.push_back(tempNode); } } } inFasta.close(); return alignSeqs.size(); } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "readFASTA"); exit(1); } } /**************************************************************************************************/ void ClusterFragmentsCommand::printData(string newfasta, string newname){ try { ofstream outFasta; ofstream outNames; util.openOutputFile(newfasta, outFasta); if (countfile == "") { util.openOutputFile(newname, outNames); } for (int i = 0; i < alignSeqs.size(); i++) { if (alignSeqs[i].numIdentical != 0) { alignSeqs[i].seq.printSequence(outFasta); if (countfile == "") { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; } } } outFasta.close(); if (countfile == "") { outNames.close(); } else { ct.printTable(newname); } } catch(exception& e) { m->errorOut(e, "ClusterFragmentsCommand", "printData"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/clusterfragmentscommand.h000077500000000000000000000035561424121717000233360ustar00rootroot00000000000000#ifndef CLUSTERFRAGMENTSCOMMAND_H #define CLUSTERFRAGMENTSCOMMAND_H /* * clusterfragmentscommand.h * Mothur * * Created by westcott on 9/23/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" #include "counttable.h" /************************************************************/ struct seqRNode { int numIdentical; int length; Sequence seq; string names; bool active; seqRNode() = default; seqRNode(int n, Sequence s, string nm, int l) : numIdentical(n), seq(s), names(nm), active(1), length(l) {} ~seqRNode() = default; }; /************************************************************/ class ClusterFragmentsCommand : public Command { public: ClusterFragmentsCommand(string); ~ClusterFragmentsCommand() = default; vector setParameters(); string getCommandName() { return "cluster.fragments"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Cluster.fragments"; } string getDescription() { return "creates a namesfile with sequences that are a fragment of a larger sequence"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: CountTable ct; bool abort; string fastafile, namefile, countfile; int diffs, percent; vector alignSeqs; map names; //represents the names file first column maps to second column map sizes; //this map a seq name to the number of identical seqs in the names file map::iterator itSize; vector outputNames; int readFASTA(); void readNameFile(); void printData(string, string); //fasta filename, names file name bool isFragment(string, string); }; /************************************************************/ #endif mothur-1.48.0/source/commands/clustersplitcommand.cpp000066400000000000000000002315371424121717000230350ustar00rootroot00000000000000/* * clustersplitcommand.cpp * Mothur * * Created by westcott on 5/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "clustersplitcommand.h" #include "systemcommand.h" #include "sensspeccommand.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" //********************************************************************************************************************** vector ClusterSplitCommand::setParameters(){ try { CommandParameter pfile("file", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "none","",false,false,true); parameters.push_back(pfile); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FastaTaxName","",false,false,true); parameters.push_back(ptaxonomy); CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName","list",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName-FastaTaxName","rabund-sabund",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "","",false,false,true); parameters.push_back(pcount); CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(ptaxlevel); CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshowabund); CommandParameter prunspenspec("runsensspec", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prunspenspec); CommandParameter pcluster("cluster", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcluster); CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pcutoff("cutoff", "Number", "", "0.03", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pmetriccutoff("delta", "Number", "", "0.0001", "", "", "","",false,false,true); parameters.push_back(pmetriccutoff); CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(piters); CommandParameter pinitialize("initialize", "Multiple", "oneotu-singleton", "singleton", "", "", "","",false,false,true); parameters.push_back(pinitialize); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted-agc-dgc-opti", "opti", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter pmetric("metric", "Multiple", "mcc-sens-spec-tptn-fpfn-tp-tn-fp-fn-f1score-accuracy-ppv-npv-fdr", "mcc", "", "", "","",false,false,true); parameters.push_back(pmetric); CommandParameter pdist("dist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdist); CommandParameter pislist("islist", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pislist); CommandParameter pclassic("classic", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pclassic); CommandParameter pvsearchlocation("vsearch", "String", "", "", "", "", "","",false,false); parameters.push_back(pvsearchlocation); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["column"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["file"] = tempOutNames; outputTypes["sensspec"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ClusterSplitCommand::getHelpString(){ try { string helpString = ""; helpString += "The cluster.split command parameter options are file, fasta, name, count, cutoff, precision, method, taxonomy, taxlevel, showabund, timing, cluster, iters, delta, initialize, dist, processors, runsensspec. Fasta or file are required.\n"; helpString += "The cluster.split command splits your files by classification using a fasta file to generate distance matrices for each taxonomic group. \n"; helpString += "The file option allows you to enter your file containing your list of column and names/count files as well as the singleton file. This file is mothur generated, when you run cluster.split() with the cluster=f parameter. This can be helpful when you have a large dataset that you may be able to use all your processors for the splitting step, but have to reduce them for the cluster step due to RAM constraints. For example: cluster.split(fasta=yourFasta, taxonomy=yourTax, count=yourCount, taxlevel=3, cluster=f, processors=8) then cluster.split(file=yourFile, processors=4). This allows your to maximize your processors during the splitting step. Also, if you are unsure if the cluster step will have RAM issue with multiple processors, you can avoid running the first part of the command multiple times.\n"; helpString += "The fasta parameter allows you to enter your aligned fasta file. \n"; helpString += "The name parameter allows you to enter your name file. \n"; helpString += "The count parameter allows you to enter your count file.\n"; helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences. This is required unless you are running the command with the file option. \n"; helpString += "The cluster parameter allows you to indicate whether you want to run the clustering or just split the dataset into taxanomic matrices, default=t"; helpString += "The dist parameter allows you to indicate whether you want a column formatted distance matrix outputted along with the list file. Default=F."; helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.03. \n"; helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n"; helpString += "The iters parameter allow you to set the maxiters for the opticluster method. \n"; helpString += "The metric parameter allows to select the metric in the opticluster method. Options are Matthews correlation coefficient (mcc), sensitivity (sens), specificity (spec), true positives + true negatives (tptn), false positives + false negatives (fpfn), true positives (tp), true negative (tn), false positive (fp), false negative (fn), f1score (f1score), accuracy (accuracy), positive predictive value (ppv), negative predictive value (npv), false discovery rate (fdr). Default=mcc.\n"; helpString += "The delta parameter allows to set the stable value for the metric in the opticluster method. Default=0.0001\n"; helpString += "The initialize parameter allows to select the initial randomization for the opticluster method. Options are singleton, meaning each sequence is randomly assigned to its own OTU, or oneotu meaning all sequences are assigned to one otu. Default=singleton.\n"; helpString += "The runsensspec parameter allows to run the sens.spec command on the completed list file. Default=true.\n"; helpString += "The method parameter allows you to enter your clustering mothod. Options are furthest, nearest, average, weighted, agc, dgc and opti. Default=opti. The agc and dgc methods require a fasta file."; helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the dataset, default=3.\n"; helpString += "The classic parameter allows you to indicate that you want to run your files with cluster.classic. Default=f.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is all available.\n"; helpString += "The vsearch parameter allows you to specify the name and location of your vsearch executable if using agc or dgc clustering methods. By default mothur will look in your path, mothur's executable and mothur tools locations. You can set the vsearch location as follows, vsearch=/usr/bin/vsearch.\n"; helpString += "The cluster.split command should be in the following format: \n"; helpString += "cluster.split(fasta=yourFastaFile, count=yourCountFile, method=yourMethod, cutoff=yourCutoff, taxonomy=yourTaxonomyfile, taxlevel=yourtaxlevel) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ClusterSplitCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } else if (type == "sensspec") { pattern = "[filename],[clustertag],sensspec"; } else if (type == "column") { pattern = "[filename],dist"; } else if (type == "file") { pattern = "[filename],file"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** string ClusterSplitCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "Cluster.split crashes after merging individual list files. What do I do?"; issues.push_back(issue); string ianswer = "\tAfter merging the split list files, mothur runs the sens.spec command on the entire dataset. The entire dataset's distance matrix may be too large to fit in memory, which causes the crash. You can skip this step by setting the runsensspec parameter to false. Skipping the sens.spec analysis does not effect the OTU assignment, and you can run the sens.spec analysis separately using the sens.spec command. \n"; ianswers.push_back(ianswer); issue = "Cluster.split crashes while reading the split distance matrices. What should I do?"; issues.push_back(issue); ianswer = "\tThe command is crashing because the distance matrices are too large to fit into memory. Why do I have such a large distance matrix? This is most often caused by poor overlap of your reads. When reads have poor overlap, it greatly increases your error rate. Also, sequences that should cluster together don't because the errors appear to be genetic differences when in fact they are not. The quality of the data you are processing can not be overstressed. Error filled reads produce error filled results. To take a step back, if you look through our MiSeq SOP, you’ll see that we go to great pains to only work with the unique sequences to limit the number of sequences we have to align, screen for chimeras, classify, etc. We all know that 20 million reads will never make it through the pipeline without setting your computer on fire. Returning to the question at hand, you can imagine that if the reads do not fully overlap then any error in the 5’ end of the first read will be uncorrected by the 3’ end of the second read. If we assume for now that the errors are random, then every error will generate a new unique sequence. Granted, this happens less than 1% of the time, but multiply that by 20 million reads at whatever length you choose and you’ve got a big number. Viola, a bunch of unique reads and a ginormous distance matrix. \n"; ianswers.push_back(ianswer); string howto = "How do I cluster my sequences into OTUs at distance 0.03?"; howtos.push_back(howto); string hanswer = "\tBy default the cluster.split command will use the opti method to cluster to 0.03. To find OTUs at a different distance set the cutoff parameter. ie. cutoff=0.01 will assemble OTUs for distance 0.01.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. ClusterSplitCommand::ClusterSplitCommand(string option) : Command() { try{ format = ""; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters file = validParameter.validFile(parameters, "file"); if (file == "not open") { file = ""; abort = true; } else if (file == "not found") { file = ""; } else { distfile = file; type = ""; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; namefile = "";} else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); type = "name"; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = "";} else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); type = "count"; } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { distfile = fastafile; current->setFastaFile(fastafile); } taxFile = validParameter.validFile(parameters, "taxonomy"); if (taxFile == "not open") { taxFile = ""; abort = true; } else if (taxFile == "not found") { taxFile = ""; } else { current->setTaxonomyFile(taxFile); } if ((fastafile == "") && (file == "")) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. When executing a cluster.split command you must enter a file file or fastafile.\n"); abort = true; } } else if ((fastafile != "") && (file != "")) { m->mothurOut("[ERROR]: When executing a cluster.split command you must enter ONLY ONE of the following: file or fasta.\n"); abort = true; } if ((countfile != "") && (namefile != "")) { m->mothurOut("[ERROR]: When executing a cluster.split command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (file != "") { if ((namefile == "") && (countfile == "")) { m->mothurOut("\n[WARNING]: When using the file option, it is recommended you include the name or count file. Doing so will ensure the OTUs are printed by OTU size reflecting the redundant reads, instead of just the unique reads.\n"); } } if (fastafile != "") { if (taxFile == "") { taxFile = current->getTaxonomyFile(); if (taxFile != "") { m->mothurOut("Using " + taxFile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a taxonomy file if you are if you are using a fasta file to generate the split.\n"); abort = true; } } if ((namefile == "") && (countfile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); type = "name"; } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); type = "count"; } else { m->mothurOut("[ERROR]: You need to provide a namefile or countfile.\n"); abort = true; } } } } string temp; temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } //saves precision legnth for formatting below length = temp.length(); util.mothurConvert(temp, precision); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "classic"); if (temp == "not found") { temp = "F"; } classic = util.isTrue(temp); temp = validParameter.valid(parameters, "runsensspec"); if (temp == "not found") { temp = "T"; } runsensSpec = util.isTrue(temp); temp = validParameter.valid(parameters, "taxlevel"); if (temp == "not found") { temp = "3"; } util.mothurConvert(temp, taxLevelCutoff); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "delta"); if (temp == "not found") { temp = "0.0001"; } util.mothurConvert(temp, stableMetric); metricName = validParameter.valid(parameters, "metric"); if (metricName == "not found") { metricName = "mcc"; } if ((metricName == "mcc") || (metricName == "sens") || (metricName == "spec") || (metricName == "tptn") || (metricName == "tp") || (metricName == "tn") || (metricName == "fp") || (metricName == "fn") || (metricName == "f1score") || (metricName == "accuracy") || (metricName == "ppv") || (metricName == "npv") || (metricName == "fdr") || (metricName == "fpfn") ){ } else { m->mothurOut("[ERROR]: Not a valid metric. Valid metrics are mcc, sens, spec, tp, tn, fp, fn, tptn, fpfn, f1score, accuracy, ppv, npv, fdr.\n"); abort = true; } initialize = validParameter.valid(parameters, "initialize"); if (initialize == "not found") { initialize = "singleton"; } if ((initialize == "singleton") || (initialize == "oneotu")){ } else { m->mothurOut("[ERROR]: Not a valid initialization. Valid initializations are singleton and oneotu.\n"); abort = true; } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "opti"; } vector versionOutputs; bool foundTool = false; string programName = "vsearch"; programName += EXECUTABLE_EXT; vsearchLocation = validParameter.validPath(parameters, "vsearch"); if (vsearchLocation == "not found") { vsearchLocation = ""; if ((method == "agc") || (method == "dgc")) { foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); } } else { if ((method == "agc") || (method == "dgc")) { //test to make sure vsearch exists ifstream in; vsearchLocation = util.getFullPathName(vsearchLocation); bool ableToOpen = util.openInputFile(vsearchLocation, in, "no error"); in.close(); if(!ableToOpen) { m->mothurOut(vsearchLocation + " file does not exist or cannot be opened, ignoring.\n"); vsearchLocation = ""; programName = util.getSimpleName(vsearchLocation); vsearchLocation = ""; foundTool = util.findTool(programName, vsearchLocation, versionOutputs, current->getLocations()); } } } if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted") || (method == "agc") || (method == "dgc") || (method == "opti")) { } else { m->mothurOut("[ERROR]: Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted, agc, dgc and opti.\n"); abort = true; } if ((method == "agc") || (method == "dgc")) { if (fastafile == "") { m->mothurOut("[ERROR]: You must provide a fasta file when using the agc or dgc clustering methods, aborting\n."); abort = true;} if (classic) { m->mothurOut("[ERROR]: You cannot use cluster.classic with the agc or dgc clustering methods, aborting\n."); abort = true; } if (!foundTool) { abort = true; } } cutoffNotSet = false; temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { cutoffNotSet = true; if ((method == "opti") || (method == "agc") || (method == "dgc")) { temp = "0.03"; }else { temp = "0.15"; } } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "showabund"); if (temp == "not found") { temp = "T"; } showabund = util.isTrue(temp); temp = validParameter.valid(parameters, "cluster"); if (temp == "not found") { temp = "T"; } runCluster = util.isTrue(temp); temp = validParameter.valid(parameters, "islist"); if (temp == "not found") { temp = "F"; } isList = util.isTrue(temp); temp = validParameter.valid(parameters, "dist"); if (temp == "not found") { temp = "F"; } makeDist = util.isTrue(temp); if (method == "opti") { makeDist = runsensSpec; } if (classic && makeDist) { m->mothurOut("[ERROR]: You cannot use the dist parameter with the classic parameter. Mothur will ignore the dist parameter.\n"); makeDist = false; } timing = validParameter.valid(parameters, "timing"); if (timing == "not found") { timing = "F"; } } } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand"); exit(1); } } //********************************************************************************************************************** int ClusterSplitCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } time_t estart; vector listFileNames; vector< map > distName; set labels; string singletonName = ""; double saveCutoff = cutoff; if (file != "") { deleteFiles = false; estart = time(nullptr); singletonName = readFile(distName); if (isList) { //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] + "\n"); } m->mothurOutEndLine(); return 0; } }else { //splitting estart = time(nullptr); bool usingVsearchToCLuster = false; if ((method == "agc") || (method == "dgc")) { usingVsearchToCLuster = true; if (cutoffNotSet) { m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.03; } } m->mothurOut("Splitting the file...\n"); current->setMothurCalling(true); //split matrix into non-overlapping groups SplitMatrix* split = new SplitMatrix(fastafile, namefile, countfile, taxFile, taxLevelCutoff, cutoff, processors, classic, outputdir, usingVsearchToCLuster); if (fastafile != "") { current->setFastaFile(fastafile); } if (m->getControl_pressed()) { delete split; return 0; } singletonName = split->getSingletonNames(); distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size delete split; current->setMothurCalling(false); if (m->getDebug()) { m->mothurOut("[DEBUG]: distName.size() = " + toString(distName.size()) + ".\n"); } m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to split the distance file.\n"); //output a merged distance file if (makeDist) { createMergedDistanceFile(distName); } if (m->getControl_pressed()) { return 0; } estart = time(nullptr); if (!runCluster) { string filename = printFile(singletonName, distName); m->mothurOutEndLine(); m->mothurOut("Output File Names:\n\n"); m->mothurOut(filename); m->mothurOutEndLine(); for (int i = 0; i < distName.size(); i++) { m->mothurOut(distName[i].begin()->first); m->mothurOutEndLine(); m->mothurOut(distName[i].begin()->second); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } deleteFiles = true; } //****************** break up files between processes and cluster each file set ******************************// listFileNames = createProcesses(distName, labels); if (deleteFiles) { //delete the temp files now that we are done for (int i = 0; i < distName.size(); i++) { string thisNamefile = distName[i].begin()->second; string thisDistFile = distName[i].begin()->first; util.mothurRemove(thisNamefile); util.mothurRemove(thisDistFile); } } if (m->getControl_pressed()) { for (int i = 0; i < listFileNames.size(); i++) { util.mothurRemove(listFileNames[i]); } return 0; } if (!util.isEqual(saveCutoff, cutoff)) { m->mothurOut("\nCutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); } m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to cluster\n"); //****************** merge list file and create rabund and sabund files ******************************// estart = time(nullptr); m->mothurOut("Merging the clustered files...\n"); ListVector* listSingle; map labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins if (m->getControl_pressed()) { if (listSingle != nullptr) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } mergeLists(listFileNames, labelBins, listSingle); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //delete after all are complete incase a crash happens if (!deleteFiles) { for (int i = 0; i < distName.size(); i++) { util.mothurRemove(distName[i].begin()->first); util.mothurRemove(distName[i].begin()->second); } } m->mothurOut("It took " + toString(time(nullptr) - estart) + " seconds to merge.\n"); if ((method == "opti") && (runsensSpec)) { runSensSpec(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "execute"); exit(1); } } //********************************************************************************************************************** map ClusterSplitCommand::completeListFile(vector listNames, string singleton, set& userLabels, ListVector*& listSingle){ try { map labelBin; vector orderFloat; int numSingleBins; //read in singletons if (singleton != "none") { listSingle = new ListVector(); if (type == "count") { CountTable ct; ct.readTable(singleton, false, false); vector singletonSeqNames = ct.getNamesOfSeqs(); for (int i = 0; i < singletonSeqNames.size(); i++) { listSingle->push_back(singletonSeqNames[i]); } }else if (type == "name") { map singletonSeqNames; util.readNames(singleton, singletonSeqNames); for (map::iterator it = singletonSeqNames.begin(); it != singletonSeqNames.end(); it++) { listSingle->push_back(it->second); } } util.mothurRemove(singleton); numSingleBins = listSingle->getNumBins(); }else{ listSingle = nullptr; numSingleBins = 0; } //go through users set and make them floats so we can sort them for(set::iterator it = userLabels.begin(); it != userLabels.end(); ++it) { double temp = -10.0; if ((*it != "unique") && (convertTestFloat(*it, temp) )) { util.mothurConvert(*it, temp); } else if (*it == "unique") { temp = -1.0; } if ((temp < cutoff) || util.isEqual(cutoff, temp)) { orderFloat.push_back(temp); labelBin[temp] = numSingleBins; //initialize numbins } } //sort order sort(orderFloat.begin(), orderFloat.end()); userLabels.clear(); //get the list info from each file for (int k = 0; k < listNames.size(); k++) { if (m->getControl_pressed()) { if (listSingle != nullptr) { delete listSingle; listSingle = nullptr; util.mothurRemove(singleton); } for (int i = 0; i < listNames.size(); i++) { util.mothurRemove(listNames[i]); } return labelBin; } InputData* input = new InputData(listNames[k], "list", nullVector); ListVector* list = input->getListVector(); string lastLabel = list->getLabel(); string filledInList = listNames[k] + "filledInTemp"; ofstream outFilled; util.openOutputFile(filledInList, outFilled); bool printHeaders = true; //for each label needed for(int l = 0; l < orderFloat.size(); l++){ string thisLabel; if (util.isEqual(orderFloat[l],-1)) { thisLabel = "unique"; } else { thisLabel = toString(orderFloat[l], length-1); } //this file has reached the end if (list == nullptr) { list = input->getListVector(lastLabel, true); }else{ //do you have the distance, or do you need to fill in float labelFloat; if (list->getLabel() == "unique") { labelFloat = -1.0; } else { convert(list->getLabel(), labelFloat); } //check for missing labels if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one //if its bigger get last label, otherwise keep it delete list; list = input->getListVector(lastLabel, true); //get last list vector to use, you actually want to move back in the file } lastLabel = list->getLabel(); } //print to new file list->setLabel(thisLabel); list->setPrintedLabels(printHeaders); list->print(outFilled, true); printHeaders = false; //update labelBin labelBin[orderFloat[l]] += list->getNumBins(); delete list; list = input->getListVector(); } if (list != nullptr) { delete list; } delete input; outFilled.close(); util.mothurRemove(listNames[k]); rename(filledInList.c_str(), listNames[k].c_str()); } return labelBin; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "completeListFile"); exit(1); } } //********************************************************************************************************************** int ClusterSplitCommand::mergeLists(vector listNames, map userLabels, ListVector* listSingle){ try { if (outputdir == "") { outputdir += util.hasPath(distfile); } fileroot = outputdir + util.getRootName(util.getSimpleName(distfile)); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); string listFileName = getOutputFileName("list", variables); map counts; ofstream outList, outRabund, outSabund; if (countfile == "") { util.openOutputFile(sabundFileName, outSabund); util.openOutputFile(rabundFileName, outRabund); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); }else { CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } util.openOutputFile(listFileName, outList); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); bool printHeaders = true; //for each label needed for(map::iterator itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) { string thisLabel; if (util.isEqual(itLabel->first,-1)) { thisLabel = "unique"; } else { thisLabel = toString(itLabel->first, length-1); } //outList << thisLabel << '\t' << itLabel->second << '\t'; RAbundVector* rabund = nullptr; ListVector completeList; completeList.setLabel(thisLabel); if (countfile == "") { rabund = new RAbundVector(); rabund->setLabel(thisLabel); } //add in singletons if (listSingle != nullptr) { for (int j = 0; j < listSingle->getNumBins(); j++) { //outList << listSingle->get(j) << '\t'; completeList.push_back(listSingle->get(j)); if (countfile == "") { rabund->push_back(util.getNumNames(listSingle->get(j))); } } } //get the list info from each file for (int k = 0; k < listNames.size(); k++) { if (m->getControl_pressed()) { if (listSingle != nullptr) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { util.mothurRemove(listNames[i]); } if (rabund != nullptr) { delete rabund; } return 0; } InputData* input = new InputData(listNames[k], "list", nullVector); ListVector* list = input->getListVector(thisLabel); //this file has reached the end if (list == nullptr) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); } else { for (int j = 0; j < list->getNumBins(); j++) { completeList.push_back(list->get(j)); if (countfile == "") { rabund->push_back(util.getNumNames(list->get(j))); } } delete list; } delete input; } if (countfile == "") { SAbundVector sabund = rabund->getSAbundVector(); sabund.print(outSabund); rabund->print(outRabund); } completeList.setPrintedLabels(printHeaders); if (countfile != "") { completeList.print(outList, counts); printHeaders = false; } else { completeList.print(outList); printHeaders = false; } if (rabund != nullptr) { delete rabund; } } outList.close(); if (countfile == "") { outRabund.close(); outSabund.close(); } if (listSingle != nullptr) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { util.mothurRemove(listNames[i]); } return 0; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "mergeLists"); exit(1); } } /**************************************************************************************************/ struct clusterData { MothurOut* m; Utils util; int count, precision, length, maxIters; //numSingletons, bool showabund, classic, useName, useCount, deleteFiles, cutoffNotSet; double cutoff, stableMetric; ofstream outList, outRabund, outSabund; string tag, method, vsearchLocation, metricName, initialize, outputDir, type; vector< map > distNames; set labels; vector listFileNames; clusterData(){} clusterData(bool showab, bool cla, bool df, vector< map > dN, bool cns, double cu, int prec, int len, string meth, string opd, string vl, string ty) { showabund = showab; distNames = dN; cutoff = cu; classic = cla; method = meth; precision = prec; length = len; outputDir = opd; vsearchLocation = vl; deleteFiles = df; cutoffNotSet = cns; m = MothurOut::getInstance(); count = 0; type = ty; useName = false; useCount = false; //numSingletons = 0; } void setOptiOptions(string metn, double stabMet, string init, int mxi ) { metricName = metn; stableMetric = stabMet; maxIters = mxi; initialize = init; } void setNamesCount(string cnf) { useName = false; useCount = false; if (type == "name") { useName = true; } if (type == "count") { useCount = true; } } }; //********************************************************************************************************************** int createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund, clusterData* params){ try { rabund->setLabel(list->getLabel()); for(int i = 0; i < list->getNumBins(); i++) { if (params->m->getControl_pressed()) { break; } vector binNames; string bin = list->get(i); params->util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]); } rabund->push_back(total); } return 0; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "createRabund"); exit(1); } } //********************************************************************************************************************** string clusterClassicFile(string thisDistFile, string thisNamefile, double& smallestCutoff, clusterData* params){ try { string listFileName = ""; ListVector* list = nullptr; ListVector oldList; RAbundVector* rabund = nullptr; params->m->mothurOut("\nReading " + thisDistFile + "\n"); //reads phylip file storing data in 2D vector, also fills list and rabund bool sim = false; ClusterClassic cluster(params->cutoff, params->method, sim); NameAssignment* nameMap = nullptr; CountTable* ct = nullptr; if(params->useName){ nameMap = new NameAssignment(thisNamefile); nameMap->readMap(); cluster.readPhylipFile(thisDistFile, nameMap); }else if (params->useCount) { ct = new CountTable(); ct->readTable(thisNamefile, false, false); cluster.readPhylipFile(thisDistFile, ct); } params->tag = cluster.getTag(); if (params->m->getControl_pressed()) { if(params->useName){ delete nameMap; } else if (params->useCount) { delete ct; } return listFileName; } list = cluster.getListVector(); rabund = cluster.getRAbundVector(); string thisOutputDir = params->outputDir; if (params->outputDir == "") { thisOutputDir += params->util.hasPath(thisDistFile); } string fileroot = thisOutputDir + params->util.getRootName(params->util.getSimpleName(thisDistFile)); listFileName = fileroot+ params->tag + ".list"; ofstream listFile; params->util.openOutputFile(fileroot+ params->tag + ".list", listFile); float previousDist = 0.00000; float rndPreviousDist = 0.00000; bool printHeaders = true; oldList = *list; params->m->mothurOut("\nClustering " + thisDistFile + "\n"); while ((cluster.getSmallDist() < params->cutoff) && (cluster.getNSeqs() > 1)){ if (params->m->getControl_pressed()) { delete list; delete rabund; listFile.close(); if(params->useName){ delete nameMap; } else if (params->useCount) { delete ct; } return listFileName; } cluster.update(params->cutoff); float dist = cluster.getSmallDist(); float rndDist = params->util.ceilDist(dist, params->precision); if(previousDist <= 0.0000 && !params->util.isEqual(dist, previousDist)){ oldList.setLabel("unique"); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count("unique") == 0) { params->labels.insert("unique"); } } else if(!params->util.isEqual(rndDist, rndPreviousDist)){ oldList.setLabel(toString(rndPreviousDist, params->length-1)); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count(toString(rndPreviousDist, params->length-1)) == 0) { params->labels.insert(toString(rndPreviousDist, params->length-1)); } } previousDist = dist; rndPreviousDist = rndDist; oldList = *list; } if(previousDist <= 0.0000){ oldList.setLabel("unique"); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count("unique") == 0) { params->labels.insert("unique"); } } else if(rndPreviousDistcutoff){ oldList.setLabel(toString(rndPreviousDist, params->length-1)); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count(toString(rndPreviousDist, params->length-1)) == 0) { params->labels.insert(toString(rndPreviousDist, params->length-1)); } } listFile.close(); delete list; delete rabund; if(params->useName) { delete nameMap; } else if (params->useCount) { delete ct; } if (params->deleteFiles) { params->util.mothurRemove(thisDistFile); params->util.mothurRemove(thisNamefile); } return listFileName; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "clusterClassicFile"); exit(1); } } //********************************************************************************************************************** string runOptiCluster(string thisDistFile, string thisNamefile, double& smallestCutoff, clusterData* params){ try { if (params->cutoffNotSet) { params->m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); params->cutoff = 0.03; } string nameOrCount = params->type; OptiMatrix matrix(thisDistFile, thisNamefile, nameOrCount, "column", params->cutoff, false); ClusterMetric* metric = nullptr; if (params->metricName == "mcc") { metric = new MCC(); } else if (params->metricName == "sens") { metric = new Sensitivity(); } else if (params->metricName == "spec") { metric = new Specificity(); } else if (params->metricName == "tptn") { metric = new TPTN(); } else if (params->metricName == "tp") { metric = new TP(); } else if (params->metricName == "tn") { metric = new TN(); } else if (params->metricName == "fp") { metric = new FP(); } else if (params->metricName == "fn") { metric = new FN(); } else if (params->metricName == "f1score") { metric = new F1Score(); } else if (params->metricName == "accuracy") { metric = new Accuracy(); } else if (params->metricName == "ppv") { metric = new PPV(); } else if (params->metricName == "npv") { metric = new NPV(); } else if (params->metricName == "fdr") { metric = new FDR(); } else if (params->metricName == "fpfn") { metric = new FPFN(); } OptiCluster cluster(&matrix, metric, 0); params->tag = cluster.getTag(); params->m->mothurOut("\nClustering " + thisDistFile + "\n"); string thisOutputDir = params->outputDir; if (params->outputDir == "") { thisOutputDir += params->util.hasPath(thisDistFile); } string fileroot = thisOutputDir + params->util.getRootName(params->util.getSimpleName(thisDistFile)); string listFileName = fileroot+ params->tag + ".list"; int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; cluster.initialize(listVectorMetric, true, params->initialize); while ((delta > params->stableMetric) && (iters < params->maxIters)) { if (params->m->getControl_pressed()) { if (params->deleteFiles) { params->util.mothurRemove(thisDistFile); params->util.mothurRemove(thisNamefile); } return listFileName; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; } if (params->m->getControl_pressed()) { delete metric; metric = nullptr; return 0; } ListVector* list = cluster.getList(); list->setLabel(toString(smallestCutoff)); //params->cutoff = params->util.ceilDist(params->cutoff, params->precision); params->labels.insert(toString(smallestCutoff)); ofstream listFile; params->util.openOutputFile(listFileName, listFile); list->print(listFile); listFile.close(); if (params->deleteFiles) { params->util.mothurRemove(thisDistFile); params->util.mothurRemove(thisNamefile); } double tp, tn, fp, fn; params->m->mothurOut("\ntp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); vector results = cluster.getStats(tp, tn, fp, fn); params->m->mothurOut(toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { params->m->mothurOut(toString(results[i]) + "\t"); } params->m->mothurOut("\n\n"); return listFileName; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "runOptiCluster"); exit(1); } } //********************************************************************************************************************** int vsearchDriver(string inputFile, string ucClusteredFile, string logfile, double cutoff, clusterData* params){ try { //vsearch --maxaccepts 16 --usersort --id 0.97 --minseqlength 30 --wordlength 8 --uc $ROOT.clustered.uc --cluster_smallmem $ROOT.sorted.fna --maxrejects 64 --strand both --log $ROOT.clustered.log --sizeorder ucClusteredFile = params->util.getFullPathName(ucClusteredFile); inputFile = params->util.getFullPathName(inputFile); logfile = params->util.getFullPathName(logfile); //to allow for spaces in the path ucClusteredFile = "\"" + ucClusteredFile + "\""; inputFile = "\"" + inputFile + "\""; logfile = "\"" + logfile + "\""; vector cPara; string vsearchCommand = params->vsearchLocation; vsearchCommand = "\"" + vsearchCommand + "\" "; vector vsearchParameters; vsearchParameters.push_back(params->util.mothurConvert(vsearchCommand)); //--maxaccepts=16 vsearchParameters.push_back(params->util.mothurConvert("--maxaccepts=16")); //--threads=1 string processorsString = "--threads=1"; vsearchParameters.push_back(params->util.mothurConvert(processorsString)); //--usersort vsearchParameters.push_back(params->util.mothurConvert("--usersort")); //--id=0.97 cutoff = abs(1.0 - cutoff); string cutoffString = toString(cutoff); if (cutoffString.length() > 4) { cutoffString = cutoffString.substr(0, 4); } else if (cutoffString.length() < 4) { for (int i = cutoffString.length(); i < 4; i++) { cutoffString += "0"; } } cutoffString = "--id=" + cutoffString; vsearchParameters.push_back(params->util.mothurConvert(cutoffString)); //--minseqlength=30 vsearchParameters.push_back(params->util.mothurConvert("--minseqlength=30")); //--wordlength=8 vsearchParameters.push_back(params->util.mothurConvert("--wordlength=8")); //--uc=$ROOT.clustered.uc string tempIn = "--uc=" + ucClusteredFile; vsearchParameters.push_back(params->util.mothurConvert(tempIn)); //--cluster_smallmem $ROOT.sorted.fna string tempSorted = "--cluster_smallmem=" + inputFile; vsearchParameters.push_back(params->util.mothurConvert(tempSorted)); //--maxrejects=64 vsearchParameters.push_back(params->util.mothurConvert("--maxrejects=64")); //--strand=both vsearchParameters.push_back(params->util.mothurConvert("--strand=both")); //--log=$ROOT.clustered.log string tempLog = "--log=" + logfile; vsearchParameters.push_back(params->util.mothurConvert(tempLog)); if (params->method == "agc") { //--sizeorder vsearchParameters.push_back(params->util.mothurConvert("--sizeorder")); } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: "); for(int i = 0; i < vsearchParameters.size(); i++) { params->m->mothurOut(toString(vsearchParameters[i]) + "\t"); } params->m->mothurOut("\n"); } string commandString = ""; for (int i = 0; i < vsearchParameters.size(); i++) { commandString += toString(vsearchParameters[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: vsearch cluster command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory for(int i = 0; i < vsearchParameters.size(); i++) { delete vsearchParameters[i]; } //remove "" from filenames ucClusteredFile = ucClusteredFile.substr(1, ucClusteredFile.length()-2); inputFile = inputFile.substr(1, inputFile.length()-2); logfile = logfile.substr(1, logfile.length()-2); return 0; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "vsearchDriver"); exit(1); } } //********************************************************************************************************************** string runVsearchCluster(string thisDistFile, string thisNamefile, double& smallestCutoff, clusterData* params){ try { params->m->mothurOut("\nClustering " + thisDistFile + "\n"); string vsearchFastafile = ""; VsearchFileParser* vParse; if (params->useName) { vParse = new VsearchFileParser(thisDistFile, thisNamefile, "name"); } else if (params->useCount) { vParse = new VsearchFileParser(thisDistFile, thisNamefile, "count"); } else { params->m->mothurOut("[ERROR]: Opps, should never get here. ClusterSplitCommand::runVsearchCluster() \n"); params->m->setControl_pressed(true); return ""; } if (params->m->getControl_pressed()) { delete vParse; return ""; } vsearchFastafile = vParse->getVsearchFile(); if (params->cutoff > 1.0) { params->m->mothurOut("You did not set a cutoff, using 0.03.\n"); params->cutoff = 0.03; } //Run vsearch string ucVsearchFile = params->util.getSimpleName(vsearchFastafile) + ".clustered.uc"; string logfile = params->util.getSimpleName(vsearchFastafile) + ".clustered.log"; vsearchDriver(vsearchFastafile, ucVsearchFile, logfile, smallestCutoff, params); if (params->m->getControl_pressed()) { params->util.mothurRemove(ucVsearchFile); params->util.mothurRemove(logfile); params->util.mothurRemove(vsearchFastafile); return ""; } string thisOutputDir = params->outputDir; if (params->outputDir == "") { thisOutputDir += params->util.hasPath(thisDistFile); } params->tag = params->method; string listFileName = thisOutputDir + params->util.getRootName(params->util.getSimpleName(thisDistFile)) + params->tag + ".list"; //Convert outputted *.uc file into a list file map counts; ListVector list = vParse->createListFile(ucVsearchFile, vParse->getNumBins(logfile), toString(params->cutoff), counts); ofstream out; params->util.openOutputFile(listFileName, out); list.DataVector::printHeaders(out); if (params->useCount) { list.print(out, counts); } else { list.print(out); } delete vParse; //remove temp files params->util.mothurRemove(ucVsearchFile); params->util.mothurRemove(logfile); params->util.mothurRemove(vsearchFastafile); if (params->deleteFiles) { params->util.mothurRemove(thisDistFile); params->util.mothurRemove(thisNamefile); } params->labels.insert(toString(params->cutoff)); return listFileName; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "runVsearchCluster"); exit(1); } } //********************************************************************************************************************** string clusterFile(string thisDistFile, string thisNamefile, double& smallestCutoff, clusterData* params){ try { string listFileName = ""; if ((params->method == "agc") || (params->method == "dgc")) { listFileName = runVsearchCluster(thisDistFile, thisNamefile, smallestCutoff, params); } else if (params->method == "opti") { listFileName = runOptiCluster(thisDistFile, thisNamefile, smallestCutoff, params); } else { Cluster* cluster = nullptr; SparseDistanceMatrix* matrix = nullptr; ListVector* list = nullptr; ListVector oldList; RAbundVector* rabund = nullptr; if (params->m->getControl_pressed()) { return listFileName; } params->m->mothurOut("\nReading " + thisDistFile + "\n"); ReadMatrix* read = new ReadColumnMatrix(thisDistFile); read->setCutoff(params->cutoff); NameAssignment* nameMap = nullptr; CountTable* ct = nullptr; if(params->useName){ nameMap = new NameAssignment(thisNamefile); nameMap->readMap(); read->read(nameMap); }else if (params->useCount) { ct = new CountTable(); ct->readTable(thisNamefile, false, false); read->read(ct); }else { read->read(nameMap); } list = read->getListVector(); matrix = read->getDMatrix(); if(params->useCount) { rabund = new RAbundVector(); createRabund(ct, list, rabund, params); //creates an rabund that includes the counts for the unique list delete ct; }else { rabund = new RAbundVector(list->getRAbundVector()); } delete read; read = nullptr; if (params->useName) { delete nameMap; nameMap = nullptr; } params->m->mothurOut("\nClustering " + thisDistFile + "\n"); //create cluster float adjust = -1.0; if (params->method == "furthest") { cluster = new CompleteLinkage(rabund, list, matrix, params->cutoff, params->method, adjust); } else if(params->method == "nearest"){ cluster = new SingleLinkage(rabund, list, matrix, params->cutoff, params->method, adjust); } else if(params->method == "average"){ cluster = new AverageLinkage(rabund, list, matrix, params->cutoff, params->method, adjust); } params->tag = cluster->getTag(); string thisOutputDir = params->outputDir; if (params->outputDir == "") { thisOutputDir += params->util.hasPath(thisDistFile); } string fileroot = thisOutputDir + params->util.getRootName(params->util.getSimpleName(thisDistFile)); listFileName = fileroot+ params->tag + ".list"; ofstream listFile; params->util.openOutputFile(listFileName, listFile); float previousDist = 0.00000; float rndPreviousDist = 0.00000; bool printHeaders = true; oldList = *list; double saveCutoff = params->cutoff; while (matrix->getSmallDist() < params->cutoff && matrix->getNNodes() > 0){ if (params->m->getControl_pressed()) { //clean up delete matrix; delete list; delete cluster; delete rabund; listFile.close(); params->util.mothurRemove(listFileName); return listFileName; } cluster->update(saveCutoff); float dist = matrix->getSmallDist(); float rndDist = params->util.ceilDist(dist, params->precision); if(previousDist <= 0.0000 && !params->util.isEqual(dist, previousDist)){ oldList.setLabel("unique"); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count("unique") == 0) { params->labels.insert("unique"); } } else if(!params->util.isEqual(rndDist, rndPreviousDist)){ oldList.setPrintedLabels(printHeaders); oldList.setLabel(toString(rndPreviousDist, params->length-1)); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count(toString(rndPreviousDist, params->length-1)) == 0) { params->labels.insert(toString(rndPreviousDist, params->length-1)); } } previousDist = dist; rndPreviousDist = rndDist; oldList = *list; } if(previousDist <= 0.0000){ oldList.setLabel("unique"); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count("unique") == 0) { params->labels.insert("unique"); } } else if(rndPreviousDistcutoff){ oldList.setLabel(toString(rndPreviousDist, params->length-1)); oldList.setPrintedLabels(printHeaders); oldList.print(listFile); printHeaders = false; if (params->labels.count(toString(rndPreviousDist, params->length-1)) == 0) { params->labels.insert(toString(rndPreviousDist, params->length-1)); } } delete matrix; delete list; delete cluster; delete rabund; matrix = nullptr; list = nullptr; cluster = nullptr; rabund = nullptr; listFile.close(); if (params->m->getControl_pressed()) { //clean up params->util.mothurRemove(listFileName); return listFileName; } if (params->deleteFiles) { params->util.mothurRemove(thisDistFile); params->util.mothurRemove(thisNamefile); } if (!params->util.isEqual(saveCutoff, params->cutoff)) { saveCutoff = params->util.ceilDist(saveCutoff, params->precision); params->m->mothurOut("Cutoff was " + toString(params->cutoff) + " changed cutoff to " + toString(saveCutoff) + "\n"); } if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; } } return listFileName; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "clusterFile"); exit(1); } } //********************************************************************************************************************** void cluster(clusterData* params){ try { vector listFileNames; double smallestCutoff = params->cutoff; //cluster each distance file for (int i = 0; i < params->distNames.size(); i++) { string thisNamefile = params->distNames[i].begin()->second; string thisDistFile = params->distNames[i].begin()->first; params->setNamesCount(thisNamefile); string listFileName = ""; if (params->classic) { listFileName = clusterClassicFile(thisDistFile, thisNamefile, smallestCutoff, params); } else { listFileName = clusterFile(thisDistFile, thisNamefile, smallestCutoff, params); } if (params->m->getControl_pressed()) { //clean up for (int i = 0; i < listFileNames.size(); i++) { params->util.mothurRemove(listFileNames[i]); } params->listFileNames.clear(); break; } params->listFileNames.push_back(listFileName); } params->cutoff = smallestCutoff; } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "cluster"); exit(1); } } //********************************************************************************************************************** void printData(ListVector* oldList, clusterData* params){ try { string label = oldList->getLabel(); RAbundVector oldRAbund = oldList->getRAbundVector(); oldRAbund.setLabel(label); if (params->showabund) { oldRAbund.getSAbundVector().print(cout); } oldRAbund.print(params->outRabund); oldRAbund.getSAbundVector().print(params->outSabund); oldList->print(params->outList, true); } catch(exception& e) { params->m->errorOut(e, "ClusterSplitCommand", "printData"); exit(1); } } //********************************************************************************************************************** vector ClusterSplitCommand::createProcesses(vector< map > distName, set& labels){ try { //sanity check if (processors > distName.size()) { processors = distName.size(); } deleteFiles = false; //so if we need to recalc the processors the files are still there vector listFiles; vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... dividedNames.resize(processors); //for each file group figure out which process will complete it //want to divide the load intelligently so the big files are spread between processes for (int i = 0; i < distName.size(); i++) { int processToAssign = (i+1) % processors; if (processToAssign == 0) { processToAssign = processors; } dividedNames[(processToAssign-1)].push_back(distName[i]); if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); } } //now lets reverse the order of ever other process, so we balance big files running with little ones for (int i = 0; i < processors; i++) { int remainder = ((i+1) % processors); if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } } if (m->getControl_pressed()) { return listFiles; } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { clusterData* dataBundle = new clusterData(showabund, classic, deleteFiles, dividedNames[i+1], cutoffNotSet, cutoff, precision, length, method, outputdir, vsearchLocation, type); dataBundle->setOptiOptions(metricName, stableMetric, initialize, maxIters); data.push_back(dataBundle); workerThreads.push_back(new std::thread(cluster, dataBundle)); } clusterData* dataBundle = new clusterData(showabund, classic, deleteFiles, dividedNames[0], cutoffNotSet, cutoff, precision, length, method, outputdir, vsearchLocation, type); dataBundle->setOptiOptions(metricName, stableMetric, initialize, maxIters); cluster(dataBundle); listFiles = dataBundle->listFileNames; tag = dataBundle->tag; cutoff = dataBundle->cutoff; labels = dataBundle->labels; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); listFiles.insert(listFiles.end(), data[i]->listFileNames.begin(), data[i]->listFileNames.end()); labels.insert(data[i]->labels.begin(), data[i]->labels.end()); if (data[i]->cutoff < cutoff) { cutoff = data[i]->cutoff; } delete data[i]; delete workerThreads[i]; } delete dataBundle; deleteFiles = true; return listFiles; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** int ClusterSplitCommand::createMergedDistanceFile(vector< map > distNames) { try{ string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("column", variables); util.mothurRemove(outputFileName); for (int i = 0; i < distNames.size(); i++) { if (m->getControl_pressed()) { return 0; } string thisDistFile = distNames[i].begin()->first; util.appendFiles(thisDistFile, outputFileName); } outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile"); exit(1); } } //********************************************************************************************************************** int ClusterSplitCommand::runSensSpec() { try{ string listFile = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { listFile = (itTypes->second)[0]; } } string columnFile = ""; if (makeDist) { itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { columnFile = (itTypes->second)[0]; } } } string inputString = "cutoff=" + toString(cutoff) + ", list=" + listFile; if (columnFile != "") { inputString += ", column=" + columnFile; } else { m->mothurOut("[WARNING]: Cannot run sens.spec analysis without a column file, skipping."); return 0; } if (namefile != "") { inputString += ", name=" + namefile; } else if (countfile != "") { inputString += ", count=" + countfile; } else { m->mothurOut("[WARNING]: Cannot run sens.spec analysis without a name or count file, skipping."); return 0; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: sens.spec(" + inputString + ")\n"); current->setMothurCalling(true); Command* sensspecCommand = new SensSpecCommand(inputString); sensspecCommand->execute(); map > filenames = sensspecCommand->getOutputFiles(); delete sensspecCommand; current->setMothurCalling(false); string outputFileName = filenames["sensspec"][0]; outputTypes["sensspec"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("/******************************************/\n"); m->mothurOut("Done.\n\n\n"); ifstream in; util.openInputFile(outputFileName, in); while(!in.eof()){ if (m->getControl_pressed()) { break; } m->mothurOut(util.getline(in)+"\n"); gobble(in); } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "runSensSpec"); exit(1); } } //********************************************************************************************************************** string ClusterSplitCommand::printFile(string singleton, vector< map >& distName){ try { ofstream out; map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(distfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(distfile)); string outputFileName = getOutputFileName("file", variables); util.openOutputFile(outputFileName, out); outputTypes["file"].push_back(outputFileName); outputNames.push_back(outputFileName); current->setFileFile(outputFileName); out << singleton << endl; if (namefile != "") { out << "name" << endl; } else if (countfile != "") { out << "count" << endl; } else { out << "unknown" << endl; } for (int i = 0; i < distName.size(); i++) { out << distName[i].begin()->first << '\t' << distName[i].begin()->second << endl; } out.close(); return outputFileName; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "printFile"); exit(1); } } //********************************************************************************************************************** string ClusterSplitCommand::readFile(vector< map >& distName){ try { string singleton, thiscolumn, thisname; ifstream in; util.openInputFile(file, in); in >> singleton; gobble(in); string path = util.hasPath(singleton); if (path == "") { singleton = inputDir + singleton; } in >> type; gobble(in); if (type == "name") { } else if (type == "count") { } else { m->mothurOut("[ERROR]: unknown file type. Are the files in column 2 of the file name files or count files? Please change unknown to name or count.\n"); m->setControl_pressed(true); } if (isList) { vector listFileNames; string thisListFileName = ""; set listLabels; while(!in.eof()) { if (m->getControl_pressed()) { break; } in >> thisListFileName; gobble(in); string path = util.hasPath(thisListFileName); if (path == "") { thisListFileName = inputDir + thisListFileName; } getLabels(thisListFileName, listLabels); listFileNames.push_back(thisListFileName); } ListVector* listSingle; map labelBins = completeListFile(listFileNames, singleton, listLabels, listSingle); mergeLists(listFileNames, labelBins, listSingle); }else { while(!in.eof()) { if (m->getControl_pressed()) { break; } in >> thiscolumn; gobble(in); in >> thisname; gobble(in); map temp; temp[thiscolumn] = thisname; distName.push_back(temp); } } in.close(); return singleton; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "readFile"); exit(1); } } //********************************************************************************************************************** int ClusterSplitCommand::getLabels(string file, set& listLabels){ try { ifstream in; util.openInputFile(file, in); //read headers util.getline(in); gobble(in); string label; while(!in.eof()) { if (m->getControl_pressed()) { break; } in >> label; util.getline(in); gobble(in); listLabels.insert(label); } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "ClusterSplitCommand", "getLabels"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/clustersplitcommand.h000077500000000000000000000045201424121717000224730ustar00rootroot00000000000000#ifndef CLUSTERSPLITCOMMAND_H #define CLUSTERSPLITCOMMAND_H /* * clustersplitcommand.h * Mothur * * Created by westcott on 5/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "listvector.hpp" #include "cluster.hpp" #include "sparsedistancematrix.h" #include "readcluster.h" #include "splitmatrix.h" #include "readphylip.h" #include "readcolumn.h" #include "readmatrix.hpp" #include "inputdata.h" #include "clustercommand.h" #include "clusterclassic.h" #include "vsearchfileparser.h" #include "opticluster.h" #include "calculator.h" class ClusterSplitCommand : public Command { public: ClusterSplitCommand(string); ~ClusterSplitCommand() = default; vector setParameters(); string getCommandName() { return "cluster.split"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCommonQuestions(); string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219. \nhttp://www.mothur.org/wiki/Cluster.split"; } string getDescription() { return "splits your sequences by distance or taxonomy then clusters into OTUs"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; string file, method, fileroot, tag, namefile, countfile, distfile, format, timing, taxFile, fastafile, inputDir, vsearchLocation, metricName, initialize, type; double cutoff, splitcutoff, stableMetric; int precision, length, processors, taxLevelCutoff, maxIters, numSingletons; bool abort, classic, runCluster, deleteFiles, isList, cutoffNotSet, makeDist, runsensSpec, showabund; void printData(ListVector*); vector createProcesses(vector< map >, set&); int mergeLists(vector, map, ListVector*); map completeListFile(vector, string, set&, ListVector*&); int createMergedDistanceFile(vector< map >); string readFile(vector< map >&); string printFile(string, vector< map >&); int getLabels(string, set& listLabels); int runSensSpec(); }; #endif mothur-1.48.0/source/commands/collectcommand.cpp000077500000000000000000000655161424121717000217320ustar00rootroot00000000000000/* * collectcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "collectcommand.h" #include "ace.h" #include "sobs.h" #include "nseqs.h" #include "chao1.h" #include "bootstrap.h" #include "simpson.h" #include "simpsoneven.h" #include "invsimpson.h" #include "npshannon.h" #include "shannon.h" #include "smithwilson.h" #include "heip.h" #include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "qstat.h" #include "logsd.h" #include "bergerparker.h" #include "bstick.h" #include "goodscoverage.h" #include "efron.h" #include "boneh.h" #include "solow.h" #include "shen.h" #include "coverage.h" #include "shannonrange.h" //********************************************************************************************************************** vector CollectCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(psabund); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["sobs"] = tempOutNames; outputTypes["chao"] = tempOutNames; outputTypes["nseqs"] = tempOutNames; outputTypes["coverage"] = tempOutNames; outputTypes["ace"] = tempOutNames; outputTypes["jack"] = tempOutNames; outputTypes["shannon"] = tempOutNames; outputTypes["shannoneven"] = tempOutNames; outputTypes["shannonrange"] = tempOutNames; outputTypes["npshannon"] = tempOutNames; outputTypes["heip"] = tempOutNames; outputTypes["smithwilson"] = tempOutNames; outputTypes["simpson"] = tempOutNames; outputTypes["simpsoneven"] = tempOutNames; outputTypes["invsimpson"] = tempOutNames; outputTypes["bootstrap"] = tempOutNames; outputTypes["geometric"] = tempOutNames; outputTypes["qstat"] = tempOutNames; outputTypes["logseries"] = tempOutNames; outputTypes["bergerparker"] = tempOutNames; outputTypes["bstick"] = tempOutNames; outputTypes["goodscoverage"] = tempOutNames; outputTypes["efron"] = tempOutNames; outputTypes["boneh"] = tempOutNames; outputTypes["solow"] = tempOutNames; outputTypes["shen"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CollectCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CollectCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc, alpha and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n"; helpString += "The collect.single command should be in the following format: \n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n"; helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n"; helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n"; helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; helpString += validCalculator.printCalc("single"); helpString += "The label parameter is used to analyze specific labels in your input.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CollectCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CollectCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sobs") { pattern = "[filename],sobs"; } else if (type == "chao") { pattern = "[filename],chao"; } else if (type == "nseqs") { pattern = "[filename],nseqs"; } else if (type == "coverage") { pattern = "[filename],coverage"; } else if (type == "ace") { pattern = "[filename],ace"; } else if (type == "jack") { pattern = "[filename],jack"; } else if (type == "shannon") { pattern = "[filename],shannon"; } else if (type == "shannoneven") { pattern = "[filename],shannoneven"; } else if (type == "shannonrange"){ pattern = "[filename],shannonrange"; } else if (type == "npshannon") { pattern = "[filename],npshannon"; } else if (type == "heip") { pattern = "[filename],heip"; } else if (type == "smithwilson") { pattern = "[filename],smithwilson"; } else if (type == "simpson") { pattern = "[filename],simpson"; } else if (type == "simpsoneven") { pattern = "[filename],simpsoneven"; } else if (type == "invsimpson") { pattern = "[filename],invsimpson"; } else if (type == "bootstrap") { pattern = "[filename],bootstrap"; } else if (type == "geometric") { pattern = "[filename],geometric"; } else if (type == "qstat") { pattern = "[filename],qstat"; } else if (type == "logseries") { pattern = "[filename],logseries"; } else if (type == "bergerparker") { pattern = "[filename],bergerparker"; } else if (type == "bstick") { pattern = "[filename],bstick"; } else if (type == "goodscoverage") { pattern = "[filename],goodscoverage"; } else if (type == "efron") { pattern = "[filename],efron"; } else if (type == "boneh") { pattern = "[filename],boneh"; } else if (type == "solow") { pattern = "[filename],solow"; } else if (type == "shen") { pattern = "[filename],shen"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CollectCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CollectCommand::CollectCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); calledHelp = true; abort = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command.\n"); abort = true; } } } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } //NOTE: if you add new calc options, don't forget to add them to the parameter initialize in setParameters or the gui won't be able to use them calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } else { if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "1"; } util.mothurConvert(temp, alpha); if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2.\n"); abort=true; } temp = validParameter.valid(parameters, "abund"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, abund); temp = validParameter.valid(parameters, "size"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, size); } } catch(exception& e) { m->errorOut(e, "CollectCommand", "CollectCommand"); exit(1); } } //********************************************************************************************************************** int CollectCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); } else { inputFileNames = parseSharedFile(sharedfile); format = "rabund"; } for (int p = 0; p < inputFileNames.size(); p++) { if (m->getControl_pressed()) { break; } if (outputdir == "") { outputdir += util.hasPath(inputFileNames[p]); } string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(inputFileNames[p])); map variables; variables["[filename]"] = fileNameRoot; if (inputFileNames.size() > 1) { m->mothurOut("\nProcessing group " + groups[p] + "\n\n"); } fillCDisplays(variables); //adds a display for each calc //if the users entered no valid calculators don't execute command if (cDisplays.size() == 0) { return 0; } InputData input(inputFileNames[p], format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; OrderVector* order = util.getNextOrder(input, allLines, userLabels, processedLabels, lastLabel); while (order != nullptr) { if (m->getControl_pressed()) { delete order; break; } Collect cCurve(order, cDisplays); cCurve.getCurve(freq); delete order; order = util.getNextOrder(input, allLines, userLabels, processedLabels, lastLabel); } //delete displays for(int i=0;i 1) { for (int p = 0; p < inputFileNames.size(); p++) { util.mothurRemove(inputFileNames[p]); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CollectCommand", "execute"); exit(1); } } //********************************************************************************************************************** void CollectCommand::fillCDisplays(map variables) { try { ValidCalculators validCalculator; for (int i=0; ierrorOut(e, "CollectCommand", "fillCDisplays"); exit(1); } } //********************************************************************************************************************** vector CollectCommand::parseSharedFile(string filename) { try { vector filenames; map files; map::iterator it3; InputData input(filename, "sharedfile", groups); SharedRAbundVectors* shared = input.getSharedRAbundVectors(); string sharedFileRoot = util.getRootName(filename); groups = shared->getNamesGroups(); //clears file before we start to write to it below for (int i=0; i lookup = shared->getSharedRAbundVectors(); for (int i = 0; i < lookup.size(); i++) { ofstream temp; string group = groups[i]; util.openOutputFileAppend(files[group], temp); lookup[i]->getRAbundVector().print(temp); temp.close(); } for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; } lookup[i] = nullptr; } shared = input.getSharedRAbundVectors(); } return filenames; } catch(exception& e) { m->errorOut(e, "CollectCommand", "parseSharedFile"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/collectcommand.h000077500000000000000000000044471424121717000213730ustar00rootroot00000000000000#ifndef COLLECTCOMMAND_H #define COLLECTCOMMAND_H /* * collectcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "ordervector.hpp" #include "inputdata.h" #include "collect.h" #include "validcalculator.h" /*The collect() command: The collect command generates a collector's curve from the given file. The collect command outputs a file for each estimator you choose to use. The collect command parameters are label, freq, single, abund. No parameters are required. The collect command should be in the following format: collect(label=yourLabel, freq=yourFreq, single=yourEstimators, abund=yourAbund). example collect(label=unique-.01-.03, freq=10, single=collect-chao-ace-jack). The default values for freq is 100, for abund is 10, and single are collect-chao-ace-jack-bootstrap-shannon-npshannon-simpson. The valid single estimators are: collect-chao-ace-jack-bootstrap-shannon-npshannon-simpson. The label parameter is used to analyze specific labels in your input. */ class CollectCommand : public Command { public: CollectCommand(string); ~CollectCommand(){} vector setParameters(); string getCommandName() { return "collect.single"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getCitation() { return "Schloss PD, Handelsman J (2006). Introducing SONS, A tool that compares the membership of microbial communities. Appl Environ Microbiol 72: 6773-9. \nhttp://www.mothur.org/wiki/Collect.single"; } string getHelpString(); string getOutputPattern(string); string getDescription() { return "generates collector's curves using calculators, that describe the richness, diversity, and other features of individual samples"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector cDisplays; int abund, size, alpha; float freq; vector outputNames, groups; bool abort, allLines; set labels; //holds labels to be used string label, calc, sharedfile, listfile, rabundfile, sabundfile, format, inputfile; vector Estimators; vector inputFileNames; vector parseSharedFile(string); void fillCDisplays(map); }; #endif mothur-1.48.0/source/commands/collectsharedcommand.cpp000077500000000000000000000734221424121717000231140ustar00rootroot00000000000000/* * collectsharedcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "collectsharedcommand.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedkstest.h" #include "whittaker.h" #include "sharednseqs.h" #include "sharedochiai.h" #include "sharedanderbergs.h" #include "sharedkulczynski.h" #include "sharedkulczynskicody.h" #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" //#include "sharedjackknife.h" #include "whittaker.h" #include "odum.h" #include "canberra.h" #include "structeuclidean.h" #include "structchord.h" #include "hellinger.h" #include "manhattan.h" #include "structpearson.h" #include "soergel.h" #include "spearman.h" #include "structkulczynski.h" #include "structchi2.h" #include "speciesprofile.h" #include "hamming.h" #include "gower.h" #include "memchi2.h" #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" #include "sharedrjsd.h" //********************************************************************************************************************** vector CollectSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["sharedchao"] = tempOutNames; outputTypes["sharedsobs"] = tempOutNames; outputTypes["sharedace"] = tempOutNames; outputTypes["jabund"] = tempOutNames; outputTypes["sorabund"] = tempOutNames; outputTypes["jclass"] = tempOutNames; outputTypes["sorclass"] = tempOutNames; outputTypes["jest"] = tempOutNames; outputTypes["sorest"] = tempOutNames; outputTypes["thetayc"] = tempOutNames; outputTypes["thetan"] = tempOutNames; outputTypes["kstest"] = tempOutNames; outputTypes["whittaker"] = tempOutNames; outputTypes["sharednseqs"] = tempOutNames; outputTypes["ochiai"] = tempOutNames; outputTypes["anderberg"] = tempOutNames; outputTypes["kulczynski"] = tempOutNames; outputTypes["kulczynskicody"] = tempOutNames; outputTypes["lennon"] = tempOutNames; outputTypes["morisitahorn"] = tempOutNames; outputTypes["braycurtis"] = tempOutNames; outputTypes["odum"] = tempOutNames; outputTypes["canberra"] = tempOutNames; outputTypes["structeuclidean"] = tempOutNames; outputTypes["structchord"] = tempOutNames; outputTypes["hellinger"] = tempOutNames; outputTypes["manhattan"] = tempOutNames; outputTypes["structpearson"] = tempOutNames; outputTypes["soergel"] = tempOutNames; outputTypes["spearman"] = tempOutNames; outputTypes["structkulczynski"] = tempOutNames; outputTypes["structchi2"] = tempOutNames; outputTypes["speciesprofile"] = tempOutNames; outputTypes["hamming"] = tempOutNames; outputTypes["gower"] = tempOutNames; outputTypes["memchi2"] = tempOutNames; outputTypes["memchord"] = tempOutNames; outputTypes["memeuclidean"] = tempOutNames; outputTypes["mempearson"] = tempOutNames; outputTypes["jsd"] = tempOutNames; outputTypes["rjsd"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CollectSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CollectSharedCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The collect.shared command parameters are shared, label, freq, calc and groups. shared is required if there is no current sharedfile. \n"; helpString += "The collect.shared command should be in the following format: \n"; helpString += "collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n"; helpString += "Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n"; helpString += "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n"; helpString += "The default value for groups is all the groups in your groupfile.\n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += validCalculator.printCalc("shared"); helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"; helpString += "If you use sharedchao and run into memory issues, set all to false. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CollectSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CollectSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sharedchao") { pattern = "[filename],shared.chao"; } else if (type == "sharedsobs") { pattern = "[filename],shared.sobs"; } else if (type == "sharedace") { pattern = "[filename],shared.ace"; } else if (type == "jabund") { pattern = "[filename],jabund"; } else if (type == "sorabund") { pattern = "[filename],sorabund"; } else if (type == "jclass") { pattern = "[filename],jclass"; } else if (type == "sorclass") { pattern = "[filename],sorclass"; } else if (type == "jest") { pattern = "[filename],jest"; } else if (type == "sorest") { pattern = "[filename],sorest"; } else if (type == "thetayc") { pattern = "[filename],thetayc"; } else if (type == "thetan") { pattern = "[filename],thetan"; } else if (type == "kstest") { pattern = "[filename],kstest"; } else if (type == "whittaker") { pattern = "[filename],whittaker"; } else if (type == "sharednseqs") { pattern = "[filename],shared.nseqs"; } else if (type == "ochiai") { pattern = "[filename],ochiai"; } else if (type == "anderberg") { pattern = "[filename],anderberg"; } else if (type == "kulczynski") { pattern = "[filename],kulczynski"; } else if (type == "kulczynskicody") { pattern = "[filename],kulczynskicody"; } else if (type == "lennon") { pattern = "[filename],lennon"; } else if (type == "morisitahorn") { pattern = "[filename],morisitahorn"; } else if (type == "braycurtis") { pattern = "[filename],braycurtis"; } else if (type == "odum") { pattern = "[filename],odum"; } else if (type == "canberra") { pattern = "[filename],canberra"; } else if (type == "structeuclidean") { pattern = "[filename],structeuclidean"; } else if (type == "structchord") { pattern = "[filename],structchord"; } else if (type == "hellinger") { pattern = "[filename],hellinger"; } else if (type == "manhattan") { pattern = "[filename],manhattan"; } else if (type == "structpearson") { pattern = "[filename],structpearson"; } else if (type == "soergel") { pattern = "[filename],soergel"; } else if (type == "spearman") { pattern = "[filename],spearman"; } else if (type == "structkulczynski") { pattern = "[filename],structkulczynski";} else if (type == "structchi2") { pattern = "[filename],structchi2"; } else if (type == "speciesprofile") { pattern = "[filename],speciesprofile"; } else if (type == "hamming") { pattern = "[filename],hamming"; } else if (type == "gower") { pattern = "[filename],gower"; } else if (type == "memchi2") { pattern = "[filename],memchi2"; } else if (type == "memchord") { pattern = "[filename],memchord"; } else if (type == "memeuclidean") { pattern = "[filename],memeuclidean"; } else if (type == "mempearson") { pattern = "[filename],mempearson"; } else if (type == "jsd") { pattern = "[filename],jsd"; } else if (type == "rjsd") { pattern = "[filename],rjsd"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CollectSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CollectSharedCommand::CollectSharedCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters=parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking.. label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } else { if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "all"); if (temp == "not found") { temp = "false"; } all = util.isTrue(temp); if (!abort) { string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(sharedfile)); map variables; variables["[filename]"] = fileNameRoot; ValidCalculators validCalculator; for (int i=0; ierrorOut(e, "CollectSharedCommand", "CollectSharedCommand"); exit(1); } } //********************************************************************************************************************** CollectSharedCommand::~CollectSharedCommand(){} //********************************************************************************************************************** int CollectSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //if the users entered no valid calculators don't execute command if (cDisplays.size() == 0) { return 0; } for(int i=0;isetAll(all); } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedOrderVector* order = util.getNextSharedOrder(input, allLines, userLabels, processedLabels, lastLabel); while (order != nullptr) { if (m->getControl_pressed()) { delete order; break; } util.mothurRandomShuffle(*order); Collect cCurve(order, cDisplays); cCurve.getSharedCurve(freq); delete order; order = util.getNextSharedOrder(input, allLines, userLabels, processedLabels, lastLabel); } for(int i=0;igetControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CollectSharedCommand", "execute"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/collectsharedcommand.h000077500000000000000000000025721424121717000225570ustar00rootroot00000000000000#ifndef COLLECTSHAREDCOMMAND_H #define COLLECTSHAREDCOMMAND_H /* * collectsharedcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "sharedordervector.h" #include "inputdata.h" #include "collect.h" #include "display.h" #include "validcalculator.h" class CollectSharedCommand : public Command { public: CollectSharedCommand(string); ~CollectSharedCommand(); vector setParameters(); string getCommandName() { return "collect.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Handelsman J (2006). Introducing SONS, A tool that compares the membership of microbial communities. Appl Environ Microbiol 72: 6773-9. \nhttp://www.mothur.org/wiki/Collect.shared"; } string getDescription() { return "generates collector's curves for calculators, which describe the similarity between communities or their shared richness"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector cDisplays; float freq; bool abort, allLines, all; set labels; //holds labels to be used string label, calc, groups, sharedfile; vector Estimators, Groups, outputNames; }; #endif mothur-1.48.0/source/commands/command.hpp000077500000000000000000000162051424121717000203600ustar00rootroot00000000000000#ifndef COMMAND_HPP #define COMMAND_HPP //test2 /* * command.h * nast * * Created by Pat Schloss on 10/23/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ /*This class is a parent to all the command classes. */ #include "mothur.h" #include "optionparser.h" #include "validparameter.h" #include "mothurout.h" #include "commandparameter.h" #include "currentfile.h" #include "utils.hpp" class Command { public: Command() { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); inputdirs = current->getInputDir(); outputdir = current->getOutputDir(); } //needed by gui virtual string getCommandName() = 0; virtual string getCommandCategory() = 0; virtual string getHelpString() = 0; virtual string getCitation() = 0; virtual string getDescription() = 0; virtual string getCommonQuestions() { return "Common Questions for commands."; } virtual string getCommandParameters() { vector parameterNames = setParameters(); sort(parameterNames.begin(), parameterNames.end()); string results = util.getStringFromVector(parameterNames, ", "); return results; } virtual map > getOutputFiles() { return outputTypes; } string getOutputFileName(string type, map variableParts) { //uses the pattern to create an output filename for a given type and input file name. try { string filename = ""; map >::iterator it; //is this a type this command creates it = outputTypes.find(type); if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } else { string patternTemp = getOutputPattern(type); vector patterns; util.splitAtDash(patternTemp, patterns); //find pattern to use based on number of variables passed in string pattern = ""; bool foundPattern = false; vector numVariablesPerPattern; for (int i = 0; i < patterns.size(); i++) { int numVariables = 0; for (int j = 0; j < patterns[i].length(); j++) { if (patterns[i][j] == '[') { numVariables++; } } numVariablesPerPattern.push_back(numVariables); if (numVariables == variableParts.size()) { pattern = patterns[i]; foundPattern = true; break; } } //if you didn't find an exact match do we have something that might work if (!foundPattern) { for (int i = 0; i < numVariablesPerPattern.size(); i++) { if (numVariablesPerPattern[i] < variableParts.size()) { pattern = patterns[i]; foundPattern = true; break; } } if (!foundPattern) { m->mothurOut("[ERROR]: Not enough variable pieces for " + type + ".\n"); m->setControl_pressed(true); } } if (pattern != "") { int numVariables = 0; for (int i = 0; i < pattern.length(); i++) { if (pattern[i] == '[') { numVariables++; } } vector pieces; util.splitAtComma(pattern, pieces); for (int i = 0; i < pieces.size(); i++) { if (pieces[i][0] == '[') { map::iterator it = variableParts.find(pieces[i]); if (it == variableParts.end()) { m->mothurOut("[ERROR]: Did not provide variable for " + pieces[i] + ".\n"); m->setControl_pressed(true); }else { if (it->second != "") { if (it->first == "[filename]") { filename += it->second; } else if (it->first == "[extension]") { if (filename.length() > 0) { //rip off last "." filename = filename.substr(0, filename.length()-1); } filename += it->second + "."; }else if ((it->first == "[group]") || (it->first == "[tag]")) { string group = it->second; for (int j = 0; j < group.length(); j++) { if (group[j] == '-') { group[j] = '_'; } } filename += group + "."; }else { filename += it->second + "."; } } } }else { filename += pieces[i] + "."; } } if (filename.length() > 0) { //rip off last "." filename = filename.substr(0, filename.length()-1); } } } return filename; } catch(exception& e) { m->errorOut(e, "command", "getOutputFileName"); exit(1); } } virtual string getOutputPattern(string) = 0; //pass in type, returns something like: [filename],align or [filename],[distance],subsample.shared strings in [] means its a variable. This is used by the gui to predict output file names. use variable keywords: [filename], [distance], [group], [extension], [tag] virtual vector setParameters() = 0; //to fill parameters virtual vector getParameters() { return parameters; } virtual int execute() = 0; virtual void help() = 0; void citation() { m->mothurOut("\n"+getCitation()+"\n"); } void commonQuestions() { m->mothurOut("\n"+getCommonQuestions()+"\n"); } virtual ~Command() { } protected: MothurOut* m; Utils util; CurrentFile* current; bool calledHelp; string outputdir; vector inputdirs; map > outputTypes; vector parameters; map >::iterator itTypes; }; #endif mothur-1.48.0/source/commands/consensusseqscommand.cpp000077500000000000000000000672311424121717000232150ustar00rootroot00000000000000/* * consensusseqscommand.cpp * Mothur * * Created by westcott on 11/23/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "consensusseqscommand.h" #include "sequence.hpp" #include "inputdata.h" //********************************************************************************************************************** vector ConsensusSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","fasta-name",false,false,true); parameters.push_back(plist); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ConsensusSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n"; helpString += "The consensus.seqs command parameters are fasta, list, name, count, cutoff and label.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The list parameter allows you to enter a your list file. \n"; helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n"; helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n"; helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n"; helpString += "The consensus.seqs command should be in the following format: \n"; helpString += "consensus.seqs(fasta=yourFastaFile, list=yourListFile) \n"; helpString += "Example: consensus.seqs(fasta=abrecovery.align, list=abrecovery.fn.list) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ConsensusSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],cons.fasta-[filename],[tag],cons.fasta"; } else if (type == "name") { pattern = "[filename],cons.names-[filename],[tag],cons.names"; } else if (type == "count") { pattern = "[filename],cons.count_table-[filename],[tag],cons.count_table"; } else if (type == "summary") { pattern = "[filename],cons.summary-[filename],[tag],cons.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ConsensusSeqsCommand::ConsensusSeqsCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, cutoff); if (outputdir == ""){ outputdir = util.hasPath(fastafile); } } } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand"); exit(1); } } //*************************************************************************************************************** int ConsensusSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); readFasta(); if (m->getControl_pressed()) { return 0; } if (namefile != "") { readNames(); } if (countfile != "") { ct.readTable(countfile, true, false); } if (m->getControl_pressed()) { return 0; } if (listfile == "") { ofstream outSummary; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputSummaryFile = getOutputFileName("summary", variables); util.openOutputFile(outputSummaryFile, outSummary); outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint); outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile); outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl; ofstream outFasta; string outputFastaFile = getOutputFileName("fasta", variables); util.openOutputFile(outputFastaFile, outFasta); outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile); vector< vector > percentages; percentages.resize(5); for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); } string consSeq = ""; int thisCount; //get counts for (int j = 0; j < seqLength; j++) { if (m->getControl_pressed()) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } vector counts; counts.resize(5, 0); //A,T,G,C,Gap int numDots = 0; thisCount = 0; for (map::iterator it = fastaMap.begin(); it != fastaMap.end(); it++) { string thisSeq = it->second; int size = 0; if (countfile != "") { size = ct.getNumSeqs(it->first); } else { map::iterator itCount = nameFileMap.find(it->first); if (itCount != nameFileMap.end()) { size = itCount->second; }else { m->mothurOut("[ERROR]: file mismatch, aborting.\n"); m->setControl_pressed(true); break; } } for (int k = 0; k < size; k++) { if (thisSeq[j] == '.') { numDots++; } char base = toupper(thisSeq[j]); if (base == 'A') { counts[0]++; } else if (base == 'T') { counts[1]++; } else if (base == 'G') { counts[2]++; } else if (base == 'C') { counts[3]++; } else { counts[4]++; } thisCount++; } } char conBase = '.'; if (numDots != thisCount) { conBase = getBase(counts, thisCount); } consSeq += conBase; percentages[0][j] = counts[0] / (float) thisCount; percentages[1][j] = counts[1] / (float) thisCount; percentages[2][j] = counts[2] / (float) thisCount; percentages[3][j] = counts[3] / (float) thisCount; percentages[4][j] = counts[4] / (float) thisCount; } for (int j = 0; j < seqLength; j++) { outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << thisCount << '\t' << consSeq[j] << endl; } outFasta << ">conseq" << endl << consSeq << endl; outSummary.close(); outFasta.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } }else { InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } processList(list); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to find the consensus sequences."); m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "execute"); exit(1); } } //*************************************************************************************************************** int ConsensusSeqsCommand::processList(ListVector*& list){ try{ ofstream outSummary; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = list->getLabel(); string outputSummaryFile = getOutputFileName("summary", variables); util.openOutputFile(outputSummaryFile, outSummary); outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint); outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile); ofstream outName; string outputNameFile = getOutputFileName("name",variables); util.openOutputFile(outputNameFile, outName); outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile); ofstream outFasta; string outputFastaFile = getOutputFileName("fasta",variables); util.openOutputFile(outputFastaFile, outFasta); outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile); outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl; string snumBins = toString(list->getNumBins()); vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { outSummary.close(); outName.close(); outFasta.close(); return 0; } string bin = list->get(i); string consSeq = getConsSeq(bin, outSummary, i); outFasta << ">" << binLabels[i] << endl << consSeq << endl; outName << binLabels[i] << '\t' << binLabels[i] << "," << bin << endl; } outSummary.close(); outName.close(); outFasta.close(); return 0; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "processList"); exit(1); } } //*************************************************************************************************************** string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, int binNumber){ try{ string consSeq = ""; bool error = false; int totalSize=0; vector binNames; util.splitAtComma(bin, binNames); vector< vector > percentages; percentages.resize(5); for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); } if (countfile != "") { //get counts for (int j = 0; j < seqLength; j++) { if (m->getControl_pressed()) { return consSeq; } vector counts; counts.resize(5, 0); //A,T,G,C,Gap int numDots = 0; totalSize = 0; for (int i = 0; i < binNames.size(); i++) { if (m->getControl_pressed()) { return consSeq; } string thisSeq = ""; map::iterator itFasta = fastaMap.find(binNames[i]); if (itFasta != fastaMap.end()) { thisSeq = itFasta->second; }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct.\n"); m->setControl_pressed(true); } int size = ct.getNumSeqs(binNames[i]); if (size != 0) { for (int k = 0; k < size; k++) { if (thisSeq[j] == '.') { numDots++; } char base = toupper(thisSeq[j]); if (base == 'A') { counts[0]++; } else if (base == 'T') { counts[1]++; } else if (base == 'G') { counts[2]++; } else if (base == 'C') { counts[3]++; } else { counts[4]++; } totalSize++; } }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your count file, please correct.\n"); m->setControl_pressed(true); } } char conBase = '.'; if (numDots != totalSize) { conBase = getBase(counts, totalSize); } consSeq += conBase; percentages[0][j] = counts[0] / (float) totalSize; percentages[1][j] = counts[1] / (float) totalSize; percentages[2][j] = counts[2] / (float) totalSize; percentages[3][j] = counts[3] / (float) totalSize; percentages[4][j] = counts[4] / (float) totalSize; } }else { //get sequence strings for each name in the bin vector seqs; for (int i = 0; i < binNames.size(); i++) { map::iterator it; it = nameMap.find(binNames[i]); if (it == nameMap.end()) { if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct.\n"); error = true; } else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct.\n"); error = true; } break; }else { //add sequence string to seqs vector to process below map::iterator itFasta = fastaMap.find(it->second); if (itFasta != fastaMap.end()) { string seq = itFasta->second; seqs.push_back(seq); }else { m->mothurOut("[ERROR]: file mismatch, aborting. \n"); } } } if (error) { m->setControl_pressed(true); return consSeq; } totalSize = seqs.size(); //get counts for (int j = 0; j < seqLength; j++) { if (m->getControl_pressed()) { return consSeq; } vector counts; counts.resize(5, 0); //A,T,G,C,Gap int numDots = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i][j] == '.') { numDots++; } char base = toupper(seqs[i][j]); if (base == 'A') { counts[0]++; } else if (base == 'T') { counts[1]++; } else if (base == 'G') { counts[2]++; } else if (base == 'C') { counts[3]++; } else { counts[4]++; } } char conBase = '.'; if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); } consSeq += conBase; percentages[0][j] = counts[0] / (float) seqs.size(); percentages[1][j] = counts[1] / (float) seqs.size(); percentages[2][j] = counts[2] / (float) seqs.size(); percentages[3][j] = counts[3] / (float) seqs.size(); percentages[4][j] = counts[4] / (float) seqs.size(); } } for (int j = 0; j < seqLength; j++) { outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << totalSize << '\t' << consSeq[j] << endl; } return consSeq; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "getConsSeq"); exit(1); } } //*************************************************************************************************************** char ConsensusSeqsCommand::getBase(vector counts, int size){ //A,T,G,C,Gap try{ /* A = adenine * C = cytosine * G = guanine * T = thymine * R = G A (purine) * Y = T C (pyrimidine) * K = G T (keto) * M = A C (amino) * S = G C (strong bonds) * W = A T (weak bonds) * B = G T C (all but A) * D = G A T (all but C) * H = A C T (all but G) * V = G C A (all but T) * N = A G C T (any) */ char conBase = 'N'; //zero out counts that don't make the cutoff float percentage = (100.0 - cutoff) / 100.0; for (int i = 0; i < counts.size(); i++) { float countPercentage = counts[i] / (float) size; if (countPercentage < percentage) { counts[i] = 0; } } //any if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'n'; } //any no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'N'; } //all but T else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'v'; } //all but T no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'V'; } //all but G else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'h'; } //all but G no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'H'; } //all but C else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'd'; } //all but C no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'D'; } //all but A else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'b'; } //all but A no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'B'; } //W = A T (weak bonds) else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'w'; } //W = A T (weak bonds) no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'W'; } //S = G C (strong bonds) else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 's'; } //S = G C (strong bonds) no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'S'; } //M = A C (amino) else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'm'; } //M = A C (amino) no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'M'; } //K = G T (keto) else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'k'; } //K = G T (keto) no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'K'; } //Y = T C (pyrimidine) else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'y'; } //Y = T C (pyrimidine) no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'Y'; } //R = G A (purine) else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'r'; } //R = G A (purine) no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'R'; } //only A else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'a'; } //only A no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'A'; } //only T else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 't'; } //only T no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'T'; } //only G else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'g'; } //only G no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'G'; } //only C else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'c'; } //only C no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'C'; } //only gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = '-'; } //cutoff removed all counts else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'N'; } else{ m->mothurOut("[ERROR]: cannot find consensus base.\n"); } return conBase; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "getBase"); exit(1); } } //*************************************************************************************************************** int ConsensusSeqsCommand::readFasta(){ try{ ifstream in; util.openInputFile(fastafile, in); seqLength = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); string name = seq.getName(); if (name != "") { fastaMap[name] = seq.getAligned(); nameMap[name] = name; //set nameMap incase no names file nameFileMap[name] = 1; if (seqLength == 0) { seqLength = seq.getAligned().length(); } else if (seqLength != seq.getAligned().length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct.\n"); m->setControl_pressed(true); break; } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "readFasta"); exit(1); } } //*************************************************************************************************************** int ConsensusSeqsCommand::readNames(){ try{ map temp; map::iterator it; bool error = false; util.readNames(namefile, temp); //use central buffered read for (map::iterator itTemp = temp.begin(); itTemp != temp.end(); itTemp++) { string thisname, repnames; thisname = itTemp->first; repnames = itTemp->second; it = nameMap.find(thisname); if (it != nameMap.end()) { //then this sequence was in the fastafile nameFileMap[thisname] = util.getNumNames(repnames); //for later when outputting the new namesFile if the list file is unique vector splitRepNames; util.splitAtComma(repnames, splitRepNames); for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; } }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct.\n"); error = true; } } if (error) { m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "ConsensusSeqsCommand", "readNames"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/consensusseqscommand.h000077500000000000000000000024461424121717000226570ustar00rootroot00000000000000#ifndef CONSENSUSSEQSCOMMAND_H #define CONSENSUSSEQSCOMMAND_H //test /* * consensusseqscommand.h * Mothur * * Created by westcott on 11/23/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" #include "counttable.h" class ConsensusSeqsCommand : public Command { public: ConsensusSeqsCommand(string); ~ConsensusSeqsCommand(){} vector setParameters(); string getCommandName() { return "consensus.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Consensus.seqs"; } string getDescription() { return "create a consensus sequence for each OTU or for a fasta file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: CountTable ct; bool abort, allLines; string fastafile, listfile, namefile, countfile, label; set labels; vector outputNames; map fastaMap; map nameMap; map nameFileMap; int seqLength; float cutoff; int readFasta(); int readNames(); int processList(ListVector*&); string getConsSeq(string, ofstream&, int); char getBase(vector, int); }; #endif mothur-1.48.0/source/commands/cooccurrencecommand.cpp000077500000000000000000000510641424121717000227500ustar00rootroot00000000000000/* * cooccurrencecommand.cpp * Mothur * * Created by kiverson on 1/2/12. * Copyright 2012 Schloss Lab. All rights reserved. * */ #include "cooccurrencecommand.h" //********************************************************************************************************************** vector CooccurrenceCommand::setParameters() { try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); CommandParameter pmetric("metric", "Multiple", "cscore-checker-combo-vratio", "cscore", "", "", "","",false,false); parameters.push_back(pmetric); CommandParameter pmatrix("matrixmodel", "Multiple", "sim1-sim2-sim3-sim4-sim5-sim6-sim7-sim8-sim9", "sim2", "", "", "","",false,false); parameters.push_back(pmatrix); CommandParameter pruns("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pruns); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CooccurrenceCommand::getHelpString(){ try { string helpString = "The cooccurrence command calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; helpString += "The cooccurrence command parameters are shared, metric, matrixmodel, iters, label and groups."; helpString += "The matrixmodel parameter options are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8 and sim9. Default=sim2"; helpString += "The metric parameter options are cscore, checker, combo and vratio. Default=cscore"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; helpString += "The cooccurrence command should be in the following format: \n"; helpString += "cooccurrence(shared=yourSharedFile) \n"; helpString += "Example cooccurrence(shared=final.an.shared).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CooccurrenceCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],cooccurence.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CooccurrenceCommand::CooccurrenceCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } //get shared file sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } metric = validParameter.valid(parameters, "metric"); if (metric == "not found") { metric = "cscore"; } if ((metric != "cscore") && (metric != "checker") && (metric != "combo") && (metric != "vratio")) { m->mothurOut("[ERROR]: " + metric + " is not a valid metric option for the cooccurrence command. Choices are cscore, checker, combo, vratio.\n"); abort = true; } matrix = validParameter.valid(parameters, "matrixmodel"); if (matrix == "not found") { matrix = "sim2"; } if ((matrix != "sim1") && (matrix != "sim2") && (matrix != "sim3") && (matrix != "sim4") && (matrix != "sim5" ) && (matrix != "sim6" ) && (matrix != "sim7" ) && (matrix != "sim8" ) && (matrix != "sim9" )) { m->mothurOut("[ERROR]: " + matrix + " is not a valid matrix option for the cooccurrence command. Choices are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8, sim9.\n"); abort = true; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, runs); } } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand"); exit(1); } } //********************************************************************************************************************** int CooccurrenceCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); ofstream out; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); string outputFileName = getOutputFileName("summary", variables); util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out << "metric\tlabel\tScore\tzScore\tstandardDeviation\tnp_Pvalue\n"; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } getCooccurrence(lookup, out); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "execute"); exit(1); } } //********************************************************************************************************************** int CooccurrenceCommand::getCooccurrence(SharedRAbundVectors*& thisLookUp, ofstream& out){ try { int numOTUS = thisLookUp->getNumBins(); if(numOTUS < 2) { m->mothurOut("Not enough OTUs for co-occurrence analysis, skipping\n"); return 0; } vector< vector > co_matrix; co_matrix.resize(thisLookUp->getNumBins()); for (int i = 0; i < thisLookUp->getNumBins(); i++) { co_matrix[i].resize((thisLookUp->size()), 0); } vector columntotal; columntotal.resize(thisLookUp->size(), 0); vector rowtotal; rowtotal.resize(numOTUS, 0); for (int j = 0; j < thisLookUp->getNumBins(); j++) { //cols of original shared file vector abunds = thisLookUp->getOTU(j); for (int i = 0; i < abunds.size(); i++) { //nrows in the shared file if (m->getControl_pressed()) { return 0; } int abund = abunds[i]; if(abund > 0) { co_matrix[j][i] = 1; rowtotal[j]++; columntotal[i]++; } } } //nrows is ncols of inital matrix. All the functions need this value. They assume the transposition has already taken place and nrows and ncols refer to that matrix. //comatrix and initmatrix are still vectors of vectors of ints as in the original script. The abundancevector is only what was read in ie not a co-occurrence matrix! int nrows = numOTUS;//rows of inital matrix int ncols = thisLookUp->size();//groups double initscore = 0.0; vector stats; vector probabilityMatrix; probabilityMatrix.resize(ncols * nrows, 0); vector > nullmatrix(nrows, vector(ncols, 0)); TrialSwap2 trial; int n = accumulate( columntotal.begin(), columntotal.end(), 0 ); //============================================================ //generate a probability matrix. Only do this once. float start = 0.0; if (matrix == "sim1") { for(int i=0;imothurOut("[ERROR]: No model selected! \n"); m->setControl_pressed(true); } //co_matrix is the transposed shared file, initmatrix is the original shared file if (metric == "cscore") { initscore = trial.calc_c_score(co_matrix, rowtotal, ncols, nrows); } else if (metric == "checker") { initscore = trial.calc_checker(co_matrix, rowtotal, ncols, nrows); } else if (metric == "vratio") { initscore = trial.calc_vratio(nrows, ncols, rowtotal, columntotal); } else if (metric == "combo") { initscore = trial.calc_combo(nrows, ncols, co_matrix); } else { m->mothurOut("[ERROR]: No metric selected!\n"); m->setControl_pressed(true); return 1; } m->mothurOut("Initial c score: " + toString(initscore)); m->mothurOutEndLine(); double previous; double current; double randnum; int count; //burn-in for sim9 if(matrix == "sim9") { for(int i=0;i<10000;i++) trial.swap_checkerboards (co_matrix, ncols, nrows); } //populate null matrix from probability matrix, do this a lot. for(int k=0;k(ncols, 0)); if(matrix == "sim1" || matrix == "sim6" || matrix == "sim8" || matrix == "sim7") { count = 0; while(count < n) { if (m->getControl_pressed()) { return 0; } nextnum2: previous = 0.0; randnum = util.getRandomDouble0to1(); for(int i=0;i previous) { nullmatrix[i][j] = 1; count++; if (count > n) break; else goto nextnum2; } previous = current; } } } } else if (matrix == "sim2") { for(int i=0;igetControl_pressed()) { return 0; } randnum = util.getRandomDouble0to1(); for(int j=0;j previous && nullmatrix[i][j] != 1) { nullmatrix[i][j] = 1; count++; previous = 0.0; break; } previous = current; } } } } else if(matrix == "sim3" || matrix == "sim5") { //columns for(int j=0;jgetControl_pressed()) { return 0; } randnum = util.getRandomDouble0to1(); for(int i=0;i previous && nullmatrix[i][j] != 1) { nullmatrix[i][j] = 1; count++; previous = 0.0; break; } previous = current; } } } } //swap_checkerboards takes the original matrix and swaps checkerboards else if(matrix == "sim9") { trial.swap_checkerboards (co_matrix, ncols, nrows); nullmatrix = co_matrix; } else { m->mothurOut("[ERROR]: No null model selected!\n\n"); m->setControl_pressed(true); return 1; } //run metric on null matrix and add score to the stats vector if (metric == "cscore"){ stats.push_back(trial.calc_c_score(nullmatrix, rowtotal, ncols, nrows)); } else if (metric == "checker") { stats.push_back(trial.calc_checker(nullmatrix, rowtotal, ncols, nrows)); } else if (metric == "vratio") { stats.push_back(trial.calc_vratio(nrows, ncols, rowtotal, columntotal)); } else if (metric == "combo") { stats.push_back(trial.calc_combo(nrows, ncols, nullmatrix)); } else { m->mothurOut("[ERROR]: No metric selected!\n\n"); m->setControl_pressed(true); return 1; } } double total = 0.0; for (int i=0; imothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine(); //calc_p_value is not a statistical p-value, it's just the average that are either > or < the initscore. //All it does is show what is expected in a competitively structured community //zscore is output so p-value can be looked up in a ztable double pvalue = 0.0; if (metric == "cscore" || metric == "checker") { pvalue = trial.calc_pvalue_greaterthan (stats, initscore); } else{ pvalue = trial.calc_pvalue_lessthan (stats, initscore); } double sd = trial.getSD(runs, stats, nullMean); double zscore = trial.get_zscore(sd, nullMean, initscore); m->mothurOut("zscore: " + toString(zscore)); m->mothurOutEndLine(); m->mothurOut("standard deviation: " + toString(sd)); m->mothurOutEndLine(); m->mothurOut("non-parametric p-value: " + toString(pvalue)); m->mothurOutEndLine(); out << metric << '\t' << thisLookUp->getLabel() << '\t' << nullMean << '\t' << zscore << '\t' << sd << '\t' << pvalue << endl; return 0; } catch(exception& e) { m->errorOut(e, "CooccurrenceCommand", "Cooccurrence"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/cooccurrencecommand.h000077500000000000000000000024421424121717000224110ustar00rootroot00000000000000#ifndef COOCCURRENCECOMMAND_H #define COOCCURRENCECOMMAND_H /* * COOCCURRENCE.h * Mothur * * Created by westcott on 11/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "trialswap2.h" #include "inputdata.h" class CooccurrenceCommand : public Command { public: CooccurrenceCommand(string); ~CooccurrenceCommand(){} vector setParameters(); string getCommandName() { return "cooccurrence"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Ulrich W & Gotelli NJ (2010). Null model analysis of species associations using abundance data. Ecology 91:3384.\nhttp://www.mothur.org/wiki/Cooccurrence"; } string getDescription() { return "calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string metric, matrix; string label, sharedfile, groups; bool abort, allLines; set labels; vector outputNames, Groups; int runs; int getCooccurrence(SharedRAbundVectors*&, ofstream&); }; #endif mothur-1.48.0/source/commands/corraxescommand.cpp000066400000000000000000000743111424121717000221210ustar00rootroot00000000000000/* * corraxescommand.cpp * Mothur * * Created by westcott on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "corraxescommand.h" #include "linearalgebra.h" //********************************************************************************************************************** vector CorrAxesCommand::setParameters(){ try { CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","corraxes",false,true,true); parameters.push_back(paxes); CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false,true); parameters.push_back(prelabund); CommandParameter pmetadata("metadata", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","",false,false); parameters.push_back(pmetadata); CommandParameter pnumaxes("numaxes", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnumaxes); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["corraxes"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CorrAxesCommand::getHelpString(){ try { string helpString = ""; helpString += "The corr.axes command reads a shared, relabund or metadata file as well as an axes file and calculates the correlation coefficient.\n"; helpString += "The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label. The shared, relabund or metadata and axes parameters are required. If shared is given the relative abundance is calculated.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n"; helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n"; helpString += "The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n"; helpString += "The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n"; helpString += "Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n"; helpString += "The corr.axes command outputs a .corr.axes file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CorrAxesCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "corraxes") { pattern = "[filename],[tag],corr.axes"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CorrAxesCommand::CorrAxesCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; axesfile = validParameter.validFile(parameters, "axes"); if (axesfile == "not open") { abort = true; } else if (axesfile == "not found") { axesfile = ""; m->mothurOut("axes is a required parameter for the corr.axes command.\n"); abort = true; } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; current->setRelAbundFile(relabundfile); } metadatafile = validParameter.validFile(parameters, "metadata"); if (metadatafile == "not open") { abort = true; } else if (metadatafile == "not found") { metadatafile = ""; } else { inputFileName = metadatafile; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } if (outputdir == ""){ outputdir = util.hasPath(inputFileName); } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } if ((relabundfile == "") && (sharedfile == "") && (metadatafile == "")) { //is there are current file available for any of these? //give priority to shared, then relabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("You must provide either a shared, relabund, or metadata file.\n"); abort = true; } } } if (metadatafile != "") { if ((relabundfile != "") || (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file.\n"); abort = true; } }else { if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared, relabund or metadata file.\n"); abort = true; } } string temp; temp = validParameter.valid(parameters, "numaxes"); if (temp == "not found"){ temp = "3"; } util.mothurConvert(temp, numaxes); method = validParameter.valid(parameters, "method"); if (method == "not found"){ method = "pearson"; } if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "CorrAxesCommand"); exit(1); } } //********************************************************************************************************************** int CorrAxesCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } /*************************************************************************************/ // use smart distancing to get right sharedRabund and convert to relabund if needed // /************************************************************************************/ if (sharedfile != "") { InputData* input = new InputData(sharedfile, "sharedfile", Groups); getSharedFloat(input); delete input; if (m->getControl_pressed()) { delete lookupFloat; return 0; } if (lookupFloat == nullptr) { m->mothurOut("[ERROR] reading relabund file.\n"); return 0; } }else if (relabundfile != "") { InputData* input = new InputData(relabundfile, "relabund", Groups); getSharedFloat(input); delete input; if (m->getControl_pressed()) { delete lookupFloat; return 0; } if (lookupFloat == nullptr) { m->mothurOut("[ERROR] reading relabund file.\n"); return 0; } }else if (metadatafile != "") { getMetadata(); //reads metadata file and store in lookupFloat, saves column headings in metadataLabels for later if (m->getControl_pressed()) { delete lookupFloat; return 0; } if (lookupFloat == nullptr) { m->mothurOut("[ERROR] reading metadata file.\n"); return 0; } }else { m->mothurOut("[ERROR]: no file given.\n"); return 0; } if (m->getControl_pressed()) { delete lookupFloat; return 0; } //this is for a sanity check to make sure the axes file and shared file match vector lookupGroups = lookupFloat->getNamesGroups(); for (int i = 0; i < lookupGroups.size(); i++) { names.insert(lookupGroups[i]); } /*************************************************************************************/ // read axes file and check for file mismatches with shared or relabund file // /************************************************************************************/ //read axes file map > axes = readAxes(); if (m->getControl_pressed()) { delete lookupFloat; return 0; } //sanity check, the read only adds groups that are in the shared or relabund file, but we want to make sure the axes file isn't missing anyone if (axes.size() != lookupGroups.size()) { map >::iterator it; for (int i = 0; i < lookupGroups.size(); i++) { it = axes.find(lookupGroups[i]); if (it == axes.end()) { m->mothurOut(lookupGroups[i] + " is in your shared of relabund file but not in your axes file, please correct.\n"); } } m->setControl_pressed(true); } if (m->getControl_pressed()) { delete lookupFloat; return 0; } /*************************************************************************************/ // calc the r values // /************************************************************************************/ map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[tag]"] = method; string outputFileName = getOutputFileName("corraxes", variables); outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //output headings if (metadatafile == "") { out << "OTU"; } else { out << "Feature"; } for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1) << "\tp-value"; } out << "\tlength" << endl; if (method == "pearson") { calcPearson(axes, out); } else if (method == "spearman") { calcSpearman(axes, out); } else if (method == "kendall") { calcKendall(axes, out); } else { m->mothurOut("[ERROR]: Invalid method.\n"); } out.close(); delete lookupFloat; if (m->getControl_pressed()) { return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "execute"); exit(1); } } //********************************************************************************************************************** int CorrAxesCommand::calcPearson(map >& axes, ofstream& out) { try { LinearAlgebra linear; vector lookupGroups = lookupFloat->getNamesGroups(); //find average of each axis - X vector averageAxes; averageAxes.resize(numaxes, 0.0); for (map >::iterator it = axes.begin(); it != axes.end(); it++) { vector temp = it->second; for (int i = 0; i < temp.size(); i++) { averageAxes[i] += temp[i]; } } for (int i = 0; i < averageAxes.size(); i++) { averageAxes[i] = averageAxes[i] / (float) axes.size(); } vector currentLabels = lookupFloat->getOTUNames(); //for each otu for (int i = 0; i < lookupFloat->getNumBins(); i++) { if (metadatafile == "") { out << currentLabels[i]; } else { out << metadataLabels[i]; } //find the averages this otu - Y float sumOtu = lookupFloat->getOTUTotal(i); float Ybar = sumOtu / (float) lookupFloat->size(); vector rValues(averageAxes.size()); //find r value for each axis for (int k = 0; k < averageAxes.size(); k++) { double r = 0.0; double numerator = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int j = 0; j < lookupFloat->size(); j++) { float Yi = lookupFloat->get(i, lookupGroups[j]); float Xi = axes[lookupGroups[j]][k]; numerator += ((Xi - averageAxes[k]) * (Yi - Ybar)); denomTerm1 += ((Xi - averageAxes[k]) * (Xi - averageAxes[k])); denomTerm2 += ((Yi - Ybar) * (Yi - Ybar)); } double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); r = numerator / denom; if (isnan(r) || isinf(r)) { r = 0.0; } rValues[k] = r; out << '\t' << r; double sig = linear.calcPearsonSig(lookupFloat->size(), r); out << '\t' << sig; } double sum = 0; for(int k=0;kerrorOut(e, "CorrAxesCommand", "calcPearson"); exit(1); } } //********************************************************************************************************************** int CorrAxesCommand::calcSpearman(map >& axes, ofstream& out) { try { LinearAlgebra linear; vector lookupGroups = lookupFloat->getNamesGroups(); vector sf; //format data vector< map > tableX; tableX.resize(numaxes); map::iterator itTable; vector< vector > scores; scores.resize(numaxes); for (map >::iterator it = axes.begin(); it != axes.end(); it++) { vector temp = it->second; for (int i = 0; i < temp.size(); i++) { spearmanRank member(it->first, temp[i]); scores[i].push_back(member); //count number of repeats itTable = tableX[i].find(temp[i]); if (itTable == tableX[i].end()) { tableX[i][temp[i]] = 1; }else { tableX[i][temp[i]]++; } } } //calc LX //for each axis vector Lx; Lx.resize(numaxes, 0.0); for (int i = 0; i < numaxes; i++) { for (itTable = tableX[i].begin(); itTable != tableX[i].end(); itTable++) { double tx = (double) itTable->second; Lx[i] += ((pow(tx, 3.0) - tx) / 12.0); } } //sort each axis for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); } //find ranks of xi in each axis map > rankAxes; for (int i = 0; i < numaxes; i++) { vector ties; int rankTotal = 0; double sfTemp = 0.0; for (int j = 0; j < scores[i].size(); j++) { rankTotal += (j+1); ties.push_back(scores[i][j]); if (j != (scores[i].size()-1)) { // you are not the last so you can look ahead if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankAxes[ties[k].name].push_back(thisrank); } int t = ties.size(); sfTemp += (t*t*t-t); ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankAxes[ties[k].name].push_back(thisrank); } } } sf.push_back(sfTemp); } vector currentLabels = lookupFloat->getOTUNames(); //for each otu for (int i = 0; i < lookupFloat->getNumBins(); i++) { if (metadatafile == "") { out << currentLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector otuScores; map tableY; for (int j = 0; j < lookupFloat->size(); j++) { spearmanRank member(lookupGroups[j], lookupFloat->get(i, lookupGroups[j])); otuScores.push_back(member); itTable = tableY.find(member.score); if (itTable == tableY.end()) { tableY[member.score] = 1; }else { tableY[member.score]++; } } //calc Ly double Ly = 0.0; for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { double ty = (double) itTable->second; Ly += ((pow(ty, 3.0) - ty) / 12.0); } sort(otuScores.begin(), otuScores.end(), compareSpearman); double sg = 0.0; map rankOtus; vector ties; int rankTotal = 0; for (int j = 0; j < otuScores.size(); j++) { rankTotal += (j+1); ties.push_back(otuScores[j]); if (j != (otuScores.size()-1)) { // you are not the last so you can look ahead if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } int t = ties.size(); sg += (t*t*t-t); ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } } } vector pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { double di = 0.0; for (int k = 0; k < lookupFloat->size(); k++) { float xi = rankAxes[lookupGroups[k]][j]; float yi = rankOtus[lookupGroups[k]]; di += ((xi - yi) * (xi - yi)); } double p = 0.0; double n = (double) lookupFloat->size(); double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx[j]; double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); if (isnan(p) || isinf(p)) { p = 0.0; } out << '\t' << p; pValues[j] = p; double sig = linear.calcSpearmanSig(n, sf[j], sg, di); out << '\t' << sig; } double sum = 0; for(int k=0;kerrorOut(e, "CorrAxesCommand", "calcSpearman"); exit(1); } } //********************************************************************************************************************** int CorrAxesCommand::calcKendall(map >& axes, ofstream& out) { try { LinearAlgebra linear; vector lookupGroups = lookupFloat->getNamesGroups(); //format data vector< vector > scores; scores.resize(numaxes); for (map >::iterator it = axes.begin(); it != axes.end(); it++) { vector temp = it->second; for (int i = 0; i < temp.size(); i++) { spearmanRank member(it->first, temp[i]); scores[i].push_back(member); } } //sort each axis for (int i = 0; i < numaxes; i++) { sort(scores[i].begin(), scores[i].end(), compareSpearman); } //convert scores to ranks of xi in each axis for (int i = 0; i < numaxes; i++) { vector ties; int rankTotal = 0; for (int j = 0; j < scores[i].size(); j++) { rankTotal += (j+1); ties.push_back(&(scores[i][j])); if (j != scores[i].size()-1) { // you are not the last so you can look ahead if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } } } } vector currentLabels = lookupFloat->getOTUNames(); //for each otu for (int i = 0; i < lookupFloat->getNumBins(); i++) { if (metadatafile == "") { out << currentLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector otuScores; for (int j = 0; j < lookupFloat->size(); j++) { spearmanRank member(lookupGroups[j], lookupFloat->get(i, lookupGroups[j])); otuScores.push_back(member); } sort(otuScores.begin(), otuScores.end(), compareSpearman); map rankOtus; vector ties; int rankTotal = 0; for (int j = 0; j < otuScores.size(); j++) { rankTotal += (j+1); ties.push_back(otuScores[j]); if (j != otuScores.size()-1) { // you are not the last so you can look ahead if (otuScores[j].score != otuScores[j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankOtus[ties[k].name] = thisrank; } } } vector pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { int numCoor = 0; int numDisCoor = 0; vector otus; vector otusTemp; for (int l = 0; l < scores[j].size(); l++) { spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]); otus.push_back(member); } int count = 0; for (int l = 0; l < scores[j].size(); l++) { int numWithHigherRank = 0; int numWithLowerRank = 0; float thisrank = otus[l].score; for (int u = l+1; u < scores[j].size(); u++) { if (otus[u].score > thisrank) { numWithHigherRank++; } else if (otus[u].score < thisrank) { numWithLowerRank++; } count++; } numCoor += numWithHigherRank; numDisCoor += numWithLowerRank; } double p = (numCoor - numDisCoor) / (float) count; if (isnan(p) || isinf(p)) { p = 0.0; } out << '\t' << p; pValues[j] = p; double sig = linear.calcKendallSig(scores[j].size(), p); out << '\t' << sig; } double sum = 0; for(int k=0;kerrorOut(e, "CorrAxesCommand", "calcKendall"); exit(1); } } //********************************************************************************************************************** int CorrAxesCommand::getSharedFloat(InputData* input){ try { lookupFloat = input->getSharedRAbundFloatVectors(); string lastLabel = lookupFloat->getLabel(); if (label == "") { label = lastLabel; return 0; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return 0; } if(labels.count(lookupFloat->getLabel()) == 1){ processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); break; } if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookupFloat->getLabel(); delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); //restore real lastlabel to save below lookupFloat->setLabels(saveLabel); break; } lastLabel = lookupFloat->getLabel(); //get next line to process //prevent memory leak delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(); } return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "getSharedFloat"); exit(1); } } /*****************************************************************/ map > CorrAxesCommand::readAxes(){ try { map > axes; ifstream in; util.openInputFile(axesfile, in); string headerLine = util.getline(in); gobble(in); //count the number of axis you are reading bool done = false; int count = 0; while (!done) { int pos = headerLine.find("axis"); if (pos != string::npos) { count++; headerLine = headerLine.substr(pos+4); }else { done = true; } } if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + ".\n"); numaxes = count; } while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return axes; } string group = ""; in >> group; gobble(in); vector thisGroupsAxes; for (int i = 0; i < count; i++) { float temp = 0.0; in >> temp; //only save the axis we want if (i < numaxes) { thisGroupsAxes.push_back(temp); } } //save group if its one the user selected if (names.count(group) != 0) { map >::iterator it = axes.find(group); if (it == axes.end()) { axes[group] = thisGroupsAxes; }else { m->mothurOut(group + " is already in your axes file, using first definition.\n"); } } gobble(in); } in.close(); return axes; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "readAxes"); exit(1); } } /*****************************************************************/ int CorrAxesCommand::getMetadata(){ try { vector groupNames; ifstream in; util.openInputFile(metadatafile, in); string headerLine = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(headerLine); //save names of columns you are reading for (int i = 1; i < pieces.size(); i++) { metadataLabels.push_back(pieces[i]); } //read rest of file lookupFloat = new SharedRAbundFloatVectors(); lookupFloat->setOTUNames(metadataLabels); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } string metadataLine = util.getline(in); gobble(in); vector metaPieces = util.splitWhiteSpace(metadataLine); string group = metaPieces[0]; groupNames.push_back(group); if (pieces.size() != metaPieces.size()) { //sanity check m->mothurOut("[ERROR]: Your metadata file seems to be mismatched. I read " + toString(pieces.size()) + " column headers, but found " + toString(metaPieces.size()) + " columns for the " + group + " group. Quitting.\n"); m->setControl_pressed(true); in.close(); return 0; } SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector(); tempLookup->setLabel("1"); tempLookup->setGroup(group); for (int i = 1; i < metaPieces.size(); i++) { float temp = 0.0; util.mothurConvert(metaPieces[i], temp); tempLookup->push_back(temp); } lookupFloat->push_back(tempLookup); } in.close(); lookupFloat->setLabels("1"); lookupFloat->eliminateZeroOTUS(); metadataLabels = lookupFloat->getOTUNames(); return 0; } catch(exception& e) { m->errorOut(e, "CorrAxesCommand", "getMetadata"); exit(1); } } /*****************************************************************/ mothur-1.48.0/source/commands/corraxescommand.h000077500000000000000000000032231424121717000215630ustar00rootroot00000000000000#ifndef CORRAXESCOMMAND_H #define CORRAXESCOMMAND_H /* * corraxescommand.h * Mothur * * Created by westcott on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sharedrabundvectors.hpp" #include "sharedrabundfloatvectors.hpp" #include "inputdata.h" class CorrAxesCommand : public Command { public: CorrAxesCommand(string); ~CorrAxesCommand(){} vector setParameters(); string getCommandName() { return "corr.axes"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Corr.axes"; } string getDescription() { return "calculate the correlation coefficient for each column in a shared/relabund file to the axes displayed in a pcoa file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string axesfile, sharedfile, relabundfile, metadatafile, groups, label, inputFileName, method; bool abort, pickedGroups; int numaxes; set names; vector outputNames, Groups; SharedRAbundFloatVectors* lookupFloat; vector metadataLabels; int getSharedFloat(InputData*); int getMetadata(); map > readAxes(); int calcPearson(map >&, ofstream&); int calcSpearman(map >&, ofstream&); int calcKendall(map >&, ofstream&); }; #endif mothur-1.48.0/source/commands/countgroupscommand.cpp000077500000000000000000000265201424121717000226650ustar00rootroot00000000000000/* * countgroupscommand.cpp * Mothur * * Created by westcott on 8/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "countgroupscommand.h" #include "inputdata.h" //********************************************************************************************************************** vector CountGroupsCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pshared); CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pgroup); CommandParameter pcount("count", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none","summary",false,false,true); parameters.push_back(pcount); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CountGroupsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CountGroupsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],count.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** string CountGroupsCommand::getHelpString(){ try { string helpString = ""; helpString += "The count.groups command counts sequences from a specific group or set of groups from the following file types: group, count or shared file.\n"; helpString += "The count.groups command parameters are accnos, group, shared and groups. You must provide a group or shared file.\n"; helpString += "The accnos parameter allows you to provide a file containing the list of groups.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n"; helpString += "The count.groups command should be in the following format: count.groups(accnos=yourAccnos, group=yourGroupFile).\n"; helpString += "Example count.groups(accnos=amazon.accnos, group=amazon.groups).\n"; helpString += "or count.groups(groups=pasture, group=amazon.groups).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CountGroupsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** CountGroupsCommand::CountGroupsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); CountTable ct; if (!ct.testGroups(countfile)) { m->mothurOut("[ERROR]: Your count file does not have any group information, aborting.\n"); abort=true; } } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((sharedfile == "") && (groupfile == "") && (countfile == "")) { //give priority to shared, then group sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required.\n"); abort = true; } } } } } } catch(exception& e) { m->errorOut(e, "CountGroupsCommand", "CountGroupsCommand"); exit(1); } } //********************************************************************************************************************** int CountGroupsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { util.readAccnos(accnosfile, Groups); } if (groupfile != "") { map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); GroupMap groupMap(groupfile); groupMap.readMap(); vector nameGroups = groupMap.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } int total = 0; for (int i = 0; i < Groups.size(); i++) { int num = groupMap.getNumSeqs(Groups[i]); total += num; m->mothurOut(Groups[i] + " contains " + toString(num) + ".\n"); out << Groups[i] << '\t' << num << endl; } out.close(); m->mothurOut("\nSize of smallest group: " + toString(groupMap.getNumSeqsSmallestGroup()) + ".\n"); m->mothurOut("\nTotal seqs: " + toString(total) + ".\n"); } if (m->getControl_pressed()) { return 0; } if (countfile != "") { map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); CountTable ct; ct.readTable(countfile, true, false); vector nameGroups = ct.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } int total = 0; for (int i = 0; i < Groups.size(); i++) { int num = ct.getGroupCount(Groups[i]); total += num; m->mothurOut(Groups[i] + " contains " + toString(num) + ".\n"); out << Groups[i] << '\t' << num << endl; } out.close(); m->mothurOut("\nSize of smallest group: " + toString(ct.getNumSeqsSmallestGroup()) + ".\n"); m->mothurOut("\nTotal seqs: " + toString(total) + ".\n"); } if (m->getControl_pressed()) { return 0; } if (sharedfile != "") { InputData input(sharedfile, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); Groups = lookup->getNamesGroups(); map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); int total = 0; vector groups = lookup->getNamesGroups(); for (int i = 0; i < groups.size(); i++) { int num = lookup->getNumSeqs(groups[i]); total += num; m->mothurOut(groups[i] + " contains " + toString(num) + ".\n"); out << groups[i] << '\t' << num << endl; } out.close(); m->mothurOut("\nSize of smallest group: " + toString(lookup->getNumSeqsSmallestGroup()) + ".\n"); delete lookup; m->mothurOut("\nTotal seqs: " + toString(total) + ".\n"); } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CountGroupsCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/countgroupscommand.h000077500000000000000000000016501424121717000223270ustar00rootroot00000000000000#ifndef COUNTGROUPSCOMMAND_H #define COUNTGROUPSCOMMAND_H /* * countgroupscommand.h * Mothur * * Created by westcott on 8/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class CountGroupsCommand : public Command { public: CountGroupsCommand(string); ~CountGroupsCommand(){} vector setParameters(); string getCommandName() { return "count.groups"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Count.groups"; } string getDescription() { return "counts the number of sequences in each group"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sharedfile, groupfile, countfile, groups, accnosfile; bool abort; vector Groups; vector outputNames; }; #endif mothur-1.48.0/source/commands/countseqscommand.cpp000077500000000000000000000432601424121717000223210ustar00rootroot00000000000000/* * countseqscommand.cpp * Mothur * * Created by westcott on 6/1/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "countseqscommand.h" #include "counttable.h" #include "inputdata.h" //********************************************************************************************************************** vector CountSeqsCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "NameSHared-sharedGroup", "NameSHared", "none","count",false,false,true); parameters.push_back(pshared); CommandParameter pcount("count", "InputTypes", "", "", "NameSHared-sharedGroup", "NameSHared", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pname("name", "InputTypes", "", "", "NameSHared", "NameSHared", "none","count",false,false,true); parameters.push_back(pname); CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CountSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The count.seqs aka. make.table command reads a name or shared file and outputs a .count_table file. You may also provide a group with the names file to get the counts broken down by group.\n"; helpString += "You can also inflate or deflate an existing count table using the count and compress parameters. ie. count.seqs(count=current, compress=t)\n"; helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n"; helpString += "The compress parameter allows you to indicate you want the count table printed in compressed format. Default=t.\n"; helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n"; helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n"; helpString += "Example count.seqs(name=amazon.names) or make.table(name=amazon.names).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CountSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "count") { pattern = "[filename],count_table-[filename],[distance],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CountSeqsCommand::CountSeqsCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found"){ namefile = ""; } else { current->setNameFile(namefile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found"){ sharedfile = ""; } else { current->setSharedFile(sharedfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found"){ countfile = ""; } else { current->setCountFile(countfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } if ((namefile == "") && (sharedfile == "") && (countfile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current namefile or sharedfile and the name or shared parameter is required, unless inflating or deflating an existing count file.\n"); abort = true; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = "all"; } util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } string temp = validParameter.valid(parameters, "compress"); if (temp == "not found") { temp = "t"; } compress = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "CountSeqsCommand"); exit(1); } } //********************************************************************************************************************** int CountSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map variables; if (countfile != "") { CountTable ct; ct.readTable(countfile, true, false, Groups); if (outputdir == "") { outputdir = util.hasPath(countfile); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); if (compress) { variables["[distance]"] = "sparse"; string outputFileName = getOutputFileName("count", variables); outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName); ct.printCompressedTable(outputFileName); }else { variables["[distance]"] = "full"; string outputFileName = getOutputFileName("count", variables); outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName); ct.printTable(outputFileName, false); } }else if (namefile != "") { if (outputdir == "") { outputdir = util.hasPath(namefile); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(namefile)); string outputFileName = getOutputFileName("count", variables); long start = time(nullptr); unsigned long long total = process(outputFileName); if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to create a table for " + toString(total) + " sequences.\n\n"); m->mothurOut("Total number of sequences: " + toString(total) + "\n"); }else { if (outputdir == "") { outputdir = util.hasPath(sharedfile); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); vector currentLabels = lookup->getOTUNames(); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } vector data = lookup->getSharedRAbundVectors(); processShared(data, variables, currentLabels); for(int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { string currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** unsigned long long CountSeqsCommand::processShared(vector& lookup, map variables, vector currentLabels){ try { variables["[distance]"] = lookup[0]->getLabel(); string outputFileName = getOutputFileName("count", variables); outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName); CountTable ct; for (int i = 0; i < lookup.size(); i++) { ct.addGroup(lookup[i]->getGroup()); } for (int j = 0; j < lookup[0]->getNumBins(); j++) { if (m->getControl_pressed()) { break; } vector outputs; for (int i = 0; i < lookup.size(); i++) { outputs.push_back(lookup[i]->get(j)); } ct.push_back(currentLabels[j], outputs); } if (compress) { ct.printCompressedTable(outputFileName); }else { ct.printTable(outputFileName); } return 0; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "processShared"); exit(1); } } //********************************************************************************************************************** unsigned long long CountSeqsCommand::process(string outputFileName){ try { CountTable ct; ct.createTable(namefile, groupfile, Groups); if (compress) { ct.printCompressedTable(outputFileName); }else { ct.printTable(outputFileName); } outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName); return ct.getNumSeqs(); } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "process"); exit(1); } } /**************************************************************************************************/ map CountSeqsCommand::processNameFile(string name) { try { map indexToNames; ofstream out; util.openOutputFile(name, out); ifstream in; util.openInputFile(namefile, in); string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; string firstCol, secondCol; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } in.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, in.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkName(firstCol); util.checkName(secondCol); //parse names into vector vector theseNames; util.splitAtComma(secondCol, theseNames); for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; } indexToNames[count] = firstCol; pairDone = false; count++; } } } in.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkName(firstCol); util.checkName(secondCol); //parse names into vector vector theseNames; util.splitAtComma(secondCol, theseNames); for (int i = 0; i < theseNames.size(); i++) { out << theseNames[i] << '\t' << count << endl; } indexToNames[count] = firstCol; pairDone = false; count++; } } } out.close(); return indexToNames; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "processNameFile"); exit(1); } } /**************************************************************************************************/ map CountSeqsCommand::getGroupNames(string filename, set& namesOfGroups) { try { map indexToGroups; map groupIndex; map::iterator it; ofstream out; util.openOutputFile(filename, out); ifstream in; util.openInputFile(groupfile, in); string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; string firstCol, secondCol; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } in.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, in.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkName(firstCol); it = groupIndex.find(secondCol); if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above groupIndex[secondCol] = count; count++; } out << firstCol << '\t' << groupIndex[secondCol] << endl; namesOfGroups.insert(secondCol); pairDone = false; } } } in.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { firstCol = pieces[i]; columnOne=false; } else { secondCol = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkName(firstCol); it = groupIndex.find(secondCol); if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above groupIndex[secondCol] = count; count++; } out << firstCol << '\t' << groupIndex[secondCol] << endl; namesOfGroups.insert(secondCol); pairDone = false; } } } out.close(); for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; } return indexToGroups; } catch(exception& e) { m->errorOut(e, "CountSeqsCommand", "getGroupNames"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/countseqscommand.h000077500000000000000000000023441424121717000217640ustar00rootroot00000000000000#ifndef COuNTSEQSCOMMAND_H #define COuNTSEQSCOMMAND_H /* * countseqscommand.h * Mothur * * Created by westcott on 6/1/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "groupmap.h" class CountSeqsCommand : public Command { public: CountSeqsCommand(string); ~CountSeqsCommand(){} vector setParameters(); string getCommandName() { return "count.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; } string getDescription() { return "makes a count file from a names or shared file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string namefile, groupfile, groups, sharedfile, countfile; bool abort, allLines, compress; vector Groups, outputNames; set labels; unsigned long long process(string); map processNameFile(string); map getGroupNames(string, set&); unsigned long long processShared(vector& lookup, map variables, vector); }; #endif mothur-1.48.0/source/commands/createdatabasecommand.cpp000077500000000000000000001115541424121717000232270ustar00rootroot00000000000000// // createdatabasecommand.cpp // Mothur // // Created by Sarah Westcott on 3/28/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "createdatabasecommand.h" #include "inputdata.h" //********************************************************************************************************************** vector CreateDatabaseCommand::setParameters(){ try { CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,false,true); parameters.push_back(pfasta); CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy); CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(prelabund); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["database"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string CreateDatabaseCommand::getHelpString(){ try { string helpString = ""; helpString += "The create.database command reads a list, shared or relabund file, *.cons.taxonomy, and optional *.rep.fasta, *.rep.names, groupfile, or count file and creates a database file.\n"; helpString += "The create.database command parameters are repfasta, list, shared, repname, constaxonomy, group, count and label. List or shared and constaxonomy are required.\n"; helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n"; helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n"; helpString += "The constaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n"; helpString += "The group file is optional and will just give you the abundance breakdown by group.\n"; helpString += "The label parameter allows you to specify a label to be used from your listfile.\n"; helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n"; helpString += "The create.database command should be in the following format: \n"; helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n"; helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string CreateDatabaseCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "database") { pattern = "[filename],database"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** CreateDatabaseCommand::CreateDatabaseCommand(string option) : Command() { try{ //allow user to run help if (option == "help") { help(); abort = true; calledHelp = true; }else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { listfile = ""; } else if (listfile == "not open") { listfile = ""; abort = true; } else { current->setListFile(listfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not found") { sharedfile = ""; } else if (sharedfile == "not open") { sharedfile = ""; abort = true; } else { current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not found") { relabundfile = ""; } else if (relabundfile == "not open") { relabundfile = ""; abort = true; } else { current->setRelAbundFile(relabundfile); } if ((sharedfile == "") && (listfile == "") && (relabundfile == "")) { //is there are current file available for either of these? //give priority to list, then shared, then relabund listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a shared, list or relabund file before you can use the create.database command.\n"); abort = true; } } } } else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: relabund, shared or list.\n"); abort = true; } if (sharedfile != "") { if (outputdir == "") { outputdir = util.hasPath(sharedfile); } } else if (listfile != ""){ if (outputdir == "") { outputdir = util.hasPath(listfile); } } else { if (outputdir == "") { outputdir = util.hasPath(relabundfile); } } contaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (contaxonomyfile == "not found") { //if there is a current list file, use it contaxonomyfile = ""; m->mothurOut("The constaxonomy parameter is required, aborting.\n"); abort = true; } else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; } repfastafile = validParameter.validFile(parameters, "repfasta"); if (repfastafile == "not found") { repfastafile = ""; } else if (repfastafile == "not open") { repfastafile = ""; abort = true; } repnamesfile = validParameter.validFile(parameters, "repname"); if (repnamesfile == "not found") { repnamesfile = ""; } else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; } if ((repnamesfile != "") && (repfastafile == "")) { m->mothurOut("[ERROR]: You must provide a repfasta file if you are using a repnames file.\n"); abort = true; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not found") { countfile = ""; } else if (countfile == "not open") { countfile = ""; abort = true; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your file.\n");} } } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand"); exit(1); } } //********************************************************************************************************************** int CreateDatabaseCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //taxonomies holds the taxonomy info for each Otu //classifyOtuSizes holds the size info of each Otu to help with error checking vector taxonomies; vector otuLabels; vector classifyOtuSizes = readTax(taxonomies, otuLabels); if (m->getControl_pressed()) { return 0; } vector seqs; vector repOtusSizes; if (repfastafile != "") { repOtusSizes = readFasta(seqs); } if (m->getControl_pressed()) { return 0; } //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map. map repNames; map nameMap; int numUniqueNamesFile = 0; CountTable ct; if (repnamesfile != "") { numUniqueNamesFile = util.readNames(repnamesfile, repNames, 1); //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below map tempRepNames; for (map::iterator it = repNames.begin(); it != repNames.end();) { string bin = it->first; vector temp; util.splitAtChar(bin, temp, ','); sort(temp.begin(), temp.end()); bin = ""; for (int i = 0; i < temp.size()-1; i++) { bin += temp[i] + ','; } bin += temp[temp.size()-1]; tempRepNames[bin] = it->second; repNames.erase(it++); } repNames = tempRepNames; }else if (countfile != ""){ ct.readTable(countfile, true, false); numUniqueNamesFile = ct.getNumUniqueSeqs(); nameMap = ct.getNameMap(); } if (m->getControl_pressed()) { return 0; } if (repfastafile != "") { //are there the same number of otus in the fasta and name files if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->setControl_pressed(true); } //are there the same number of OTUs in the tax and fasta file if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { return 0; } //at this point we have the same number of OTUs. Are the sizes we have found so far accurate? for (int i = 0; i < classifyOtuSizes.size(); i++) { if (classifyOtuSizes[i] != repOtusSizes[i]) { m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ". These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); } } } if (m->getControl_pressed()) { return 0; } map variables; if (listfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); } else if (sharedfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); } else { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(relabundfile)); } string outputFileName = getOutputFileName("database", variables); outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); string header = "OTUNumber\tAbundance"; if (listfile != "") { //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile ListVector* list = getList(); if (otuLabels.size() != list->getNumBins()) { m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { delete list; return 0; } GroupMap* groupmap = nullptr; if (groupfile != "") { groupmap = new GroupMap(groupfile); groupmap->readMap(); } if (m->getControl_pressed()) { delete list; if (groupfile != "") { delete groupmap; } return 0; } if (groupfile != "") { header = "OTUNumber"; for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += '\t' + (groupmap->getNamesOfGroups())[i]; } }else if (countfile != "") { if (ct.hasGroupInfo()) { header = "OTUNumber"; for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += '\t' + (ct.getNamesOfGroups())[i]; } } } if (repfastafile != "") { header += "\trepSeqName\trepSeq"; } header += "\tOTUConTaxonomy"; out << header << endl; vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { int index = findIndex(otuLabels, binLabels[i]); if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { break; } out << otuLabels[index]; vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); string seqRepName = ""; int numSeqsRep = binNames.size(); if (repnamesfile != "") { sort(binNames.begin(), binNames.end()); bin = ""; for (int j = 0; j < binNames.size()-1; j++) { bin += binNames[j] + ','; } bin += binNames[binNames.size()-1]; map::iterator it = repNames.find(bin); if (it == repNames.end()) { m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break; }else { seqRepName = it->second; numSeqsRep = binNames.size(); } //sanity check if (binNames.size() != classifyOtuSizes[index]) { m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break; } }else if ((countfile != "") && (repfastafile != "")) { //find rep sequence in bin for (int j = 0; j < binNames.size(); j++) { map::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file. if (itNameMap != nameMap.end()) { seqRepName = itNameMap->first; numSeqsRep = itNameMap->second; j += binNames.size(); //exit loop } } if (seqRepName == "") { m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break; } if (numSeqsRep != classifyOtuSizes[i]) { m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->setControl_pressed(true); break; } } //output abundances if (groupfile != "") { string groupAbunds = ""; map counts; //initialize counts to 0 for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; } //find abundances by group bool error = false; for (int j = 0; j < binNames.size(); j++) { string group = groupmap->getGroup(binNames[j]); if (group == "not found") { m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n"); error = true; }else { counts[group]++; } } //output counts for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << '\t' << counts[(groupmap->getNamesOfGroups())[j]]; } if (error) { m->setControl_pressed(true); } }else if ((countfile != "") && (repfastafile != "")) { if (ct.hasGroupInfo()) { vector groupCounts = ct.getGroupCounts(seqRepName); for (int j = 0; j < groupCounts.size(); j++) { out << '\t' << groupCounts[j]; } }else { out << '\t' << numSeqsRep; } }else if ((countfile != "") && (repfastafile == "")) { if (ct.hasGroupInfo()) { vector groupTotals; groupTotals.resize(ct.getNumGroups(), 0); for (int j = 0; j < binNames.size(); j++) { vector groupCounts = ct.getGroupCounts(binNames[j]); for (int k = 0; k < groupCounts.size(); k++) { groupTotals[k] += groupCounts[k]; } } for (int j = 0; j < groupTotals.size(); j++) { out << '\t' << groupTotals[j]; } }else { out << '\t' << numSeqsRep; } }else { out << '\t' << numSeqsRep; } //output repSeq if (repfastafile != "") { out << '\t' << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; } else { out << '\t' << taxonomies[index] << endl; } } delete list; if (groupfile != "") { delete groupmap; } }else if (sharedfile != "") { SharedRAbundVectors* lookup = getShared(); vector namesOfGroups = lookup->getNamesGroups(); header = "OTUNumber"; for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; } if (repfastafile != "") { header += "\trepSeqName\trepSeq"; } header += "\tOTUConTaxonomy"; out << header << endl; vector currentLabels = lookup->getOTUNames(); for (int h = 0; h < lookup->getNumBins(); h++) { if (m->getControl_pressed()) { break; } int index = findIndex(otuLabels, currentLabels[h]); if (index == -1) { m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { break; } out << otuLabels[index]; int totalAbund = 0; for (int i = 0; i < lookup->size(); i++) { int abund = lookup->get(h, namesOfGroups[i]); totalAbund += abund; out << '\t' << abund; } //output repSeq if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; } else { out << '\t' << taxonomies[index] << endl; } } }else { //relabund SharedRAbundFloatVectors* lookup = getRelabund(); vector namesOfGroups = lookup->getNamesGroups(); header = "OTUNumber"; for (int i = 0; i < namesOfGroups.size(); i++) { header += '\t' + namesOfGroups[i]; } if (repfastafile != "") { header += "\trepSeqName\trepSeq"; } header += "\tOTUConTaxonomy"; out << header << endl; vector currentLabels = lookup->getOTUNames(); for (int h = 0; h < lookup->getNumBins(); h++) { if (m->getControl_pressed()) { break; } int index = findIndex(otuLabels, currentLabels[h]); if (index == -1) { m->mothurOut("[ERROR]: " + currentLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { break; } out << otuLabels[index]; float totalAbund = 0; for (int i = 0; i < lookup->size(); i++) { float abund = lookup->get(h, namesOfGroups[i]); totalAbund += abund; out << '\t' << abund; } //output repSeq if (repfastafile != "") { out << '\t' << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl; } else { out << '\t' << taxonomies[index] << endl; } } } out.close(); if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "execute"); exit(1); } } //********************************************************************************************************************** int CreateDatabaseCommand::findIndex(vector& otuLabels, string label){ try { int index = -1; for (int i = 0; i < otuLabels.size(); i++) { if (util.isLabelEquivalent(otuLabels[i],label)) { index = i; break; } } return index; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "findIndex"); exit(1); } } //********************************************************************************************************************** vector CreateDatabaseCommand::readTax(vector& taxonomies, vector& otuLabels){ try { vector sizes; ifstream in; util.openInputFile(contaxonomyfile, in); //read headers util.getline(in); while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; string tax = "unknown"; int size = 0; in >> otu >> size; gobble(in); tax = util.getline(in); gobble(in); sizes.push_back(size); taxonomies.push_back(tax); otuLabels.push_back(otu); } in.close(); return sizes; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "readTax"); exit(1); } } //********************************************************************************************************************** vector CreateDatabaseCommand::readFasta(vector& seqs){ try { vector sizes; ifstream in; util.openInputFile(repfastafile, in); set sanity; while (!in.eof()) { if (m->getControl_pressed()) { break; } string binInfo; Sequence seq(in, binInfo, true); gobble(in); //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file vector info; util.splitAtChar(binInfo, info, '|'); //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->setControl_pressed(true); break;} int size = 0; util.mothurConvert(info[1], size); int binNumber = 0; string temp = ""; for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } } util.mothurConvert(util.getSimpleLabel(temp), binNumber); set::iterator it = sanity.find(binNumber); if (it != sanity.end()) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->setControl_pressed(true); break; }else { sanity.insert(binNumber); } sizes.push_back(size); seqs.push_back(seq); } in.close(); return sizes; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** ListVector* CreateDatabaseCommand::getList(){ try { InputData* input = new InputData(listfile, "list", nullVector); ListVector* list = input->getListVector(); string lastLabel = list->getLabel(); if (label == "") { label = lastLabel; delete input; return list; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((list != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { delete input; return list; } if(labels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input->getListVector(lastLabel); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); break; } lastLabel = list->getLabel(); //get next line to process //prevent memory leak delete list; list = input->getListVector(); } if (m->getControl_pressed()) { delete input; return list; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete list; list = input->getListVector(lastLabel); } delete input; return list; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "getList"); exit(1); } } //********************************************************************************************************************** SharedRAbundVectors* CreateDatabaseCommand::getShared(){ try { InputData input(sharedfile, "sharedfile", nullVector); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); string lastLabel = lookup->getLabel(); if (label == "") { label = lastLabel; return lookup; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return lookup; } if(labels.count(lookup->getLabel()) == 1){ processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); break; } if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup->getLabel(); delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); //restore real lastlabel to save below lookup->setLabels(saveLabel); break; } lastLabel = lookup->getLabel(); //get next line to process //prevent memory leak delete lookup; lookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { return lookup; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); } return lookup; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "getShared"); exit(1); } } //********************************************************************************************************************** SharedRAbundFloatVectors* CreateDatabaseCommand::getRelabund(){ try { InputData input(relabundfile, "relabund", nullVector); SharedRAbundFloatVectors* lookupFloat = input.getSharedRAbundFloatVectors(); string lastLabel = lookupFloat->getLabel(); if (label == "") { label = lastLabel; return lookupFloat; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return 0; } if(labels.count(lookupFloat->getLabel()) == 1){ processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); break; } if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookupFloat->getLabel(); delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(lastLabel); processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); //restore real lastlabel to save below lookupFloat->setLabels(saveLabel); break; } lastLabel = lookupFloat->getLabel(); //get next line to process //prevent memory leak delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(); } return lookupFloat; } catch(exception& e) { m->errorOut(e, "CreateDatabaseCommand", "getRelabund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/createdatabasecommand.h000077500000000000000000000026331424121717000226710ustar00rootroot00000000000000#ifndef Mothur_createdatabasecommand_h #define Mothur_createdatabasecommand_h // // createdatabasecommand.h // Mothur // // Created by Sarah Westcott on 3/28/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "listvector.hpp" #include "sequence.hpp" #include "sharedrabundfloatvectors.hpp" class CreateDatabaseCommand : public Command { public: CreateDatabaseCommand(string); ~CreateDatabaseCommand(){} vector setParameters(); string getCommandName() { return "create.database"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Create.database"; } string getDescription() { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string relabundfile, sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, countfile; vector outputNames; vector readFasta(vector&); vector readTax(vector&, vector&); ListVector* getList(); SharedRAbundVectors* getShared(); SharedRAbundFloatVectors* getRelabund(); int findIndex(vector&, string); }; #endif mothur-1.48.0/source/commands/degapseqscommand.cpp000077500000000000000000000253271424121717000222550ustar00rootroot00000000000000/* * degapseqscommand.cpp * Mothur * * Created by westcott on 6/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "degapseqscommand.h" //********************************************************************************************************************** vector DegapSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string DegapSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The degap.seqs command reads a fastafile and removes all gap characters.\n"; helpString += "The degap.seqs command parameter are fasta and processors.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required unless you have a valid current fasta file. \n"; helpString += "The processors parameter allows you to enter the number of processors you would like to use. \n"; helpString += "The degap.seqs command should be in the following format: \n"; helpString += "degap.seqs(fasta=yourFastaFile) \n"; helpString += "Example: degap.seqs(fasta=abrecovery.align) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string DegapSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],ng.fasta"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** DegapSeqsCommand::DegapSeqsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); if (outputdir == ""){ outputdir += util.hasPath(fastafile); } } } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "DegapSeqsCommand"); exit(1); } } //*************************************************************************************************************** int DegapSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Degapping sequences from " + fastafile + " ...\n" ); string tempOutputDir = outputdir; if (outputdir == "") { tempOutputDir = util.hasPath(fastafile); } map variables; variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(fastafile)); string degapFile = getOutputFileName("fasta", variables); outputNames.push_back(degapFile); outputTypes["fasta"].push_back(degapFile); long start = time(nullptr); int numSeqs = createProcesses(fastafile, degapFile); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to degap " + toString(numSeqs) + " sequences.\n\n"); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "execute"); exit(1); } } /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct degapData { string filename; int count; unsigned long long start; unsigned long long end; OutputWriter* threadWriter; MothurOut* m; Utils util; degapData(){} degapData(string f, unsigned long long st, unsigned long long en, OutputWriter* w) { //InputReader* i m = MothurOut::getInstance(); filename = f; threadWriter = w; start = st; end = en; count = 0; } }; //*************************************************************************************************************** void driverDegap(degapData* params){ try{ ifstream inFASTA; params->util.openInputFile(params->filename, inFASTA); inFASTA.seekg(params->start); if (params->start == 0) { params->util.zapGremlins(inFASTA); gobble(inFASTA); } while(!inFASTA.eof()){ if (params->m->getControl_pressed()) { break; } Sequence currSeq(inFASTA); gobble(inFASTA); if (currSeq.getName() != "") { params->threadWriter->write(">"+currSeq.getName()+"\n"+currSeq.getUnaligned()+"\n"); params->count++; } //report progress if((params->count) % 10000 == 0){ params->m->mothurOutJustToScreen(toString(params->count) + "\n"); } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif } //report progress if((params->count) % 10000 != 0){ params->m->mothurOutJustToScreen(toString(params->count) + "\n"); } inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "DegapSeqsCommand", "driver"); exit(1); } } //*************************************************************************************************************** long long DegapSeqsCommand::createProcesses(string filename, string outputFileName){ try{ //create array of worker threads vector workerThreads; vector data; vector lines; long long num = 0; vector positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, -1)); }//forces it to read whole file else { positions = util.setFilePosFasta(filename, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif auto synchronizedFile = std::make_shared(outputFileName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadWriter = new OutputWriter(synchronizedFile); degapData* dataBundle = new degapData(filename, lines[i+1].start, lines[i+1].end, threadWriter); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverDegap, dataBundle)); } OutputWriter* threadWriter = new OutputWriter(synchronizedFile); degapData* dataBundle = new degapData(filename, lines[0].start, lines[0].end, threadWriter); driverDegap(dataBundle); num = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->threadWriter; delete data[i]; delete workerThreads[i]; } synchronizedFile->close(); delete threadWriter; delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "createProcesses"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/degapseqscommand.h000077500000000000000000000017001424121717000217070ustar00rootroot00000000000000#ifndef DEGAPSEQSCOMMAND_H #define DEGAPSEQSCOMMAND_H /* * degapseqscommand.h * Mothur * * Created by westcott on 6/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" #include "writer.h" class DegapSeqsCommand : public Command { public: DegapSeqsCommand(string); ~DegapSeqsCommand(){} vector setParameters(); string getCommandName() { return "degap.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Degap.seqs"; } string getDescription() { return "removes gap characters from sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int processors; bool abort; string fastafile; vector outputNames; long long createProcesses(string, string); }; #endif mothur-1.48.0/source/commands/deuniqueseqscommand.cpp000077500000000000000000000254711424121717000230140ustar00rootroot00000000000000/* * deuniqueseqscommand.cpp * Mothur * * Created by westcott on 10/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "deuniqueseqscommand.h" #include "sequence.hpp" #include "counttable.h" //********************************************************************************************************************** vector DeUniqueSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["group"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string DeUniqueSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n"; helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n"; helpString += "The deunique.seqs command should be in the following format: \n"; helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n"; helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string DeUniqueSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],redundant.fasta"; } else if (type == "group") { pattern = "[filename],redundant.groups"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "getOutputPattern"); exit(1); } } /**************************************************************************************/ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastaFile = validParameter.validFile(parameters, "fasta"); if (fastaFile == "not open") { abort = true; } else if (fastaFile == "not found") { fastaFile = current->getFastaFile(); if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastaFile); } nameFile = validParameter.validFile(parameters, "name"); if (nameFile == "not open") { abort = true; } else if (nameFile == "not found"){ nameFile = ""; } else { current->setNameFile(nameFile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } if ((countfile == "") && (nameFile == "")) { //look for currents nameFile = current->getNameFile(); if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current name or count files one is required.\n"); abort = true; } } } } } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "DeUniqueSeqsCommand"); exit(1); } } /**************************************************************************************/ int DeUniqueSeqsCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } //prepare filenames and open files ofstream out; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastaFile); } string outFastaFile = thisOutputDir + util.getRootName(util.getSimpleName(fastaFile)); map variables; variables["[filename]"] = outFastaFile; outFastaFile = getOutputFileName("fasta", variables); util.openOutputFile(outFastaFile, out); map nameMap; CountTable ct; ofstream outGroup; string outGroupFile; vector groups; if (nameFile != "") { util.readNames(nameFile, nameMap); } else { ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } outGroupFile = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[filename]"] = outGroupFile; outGroupFile = getOutputFileName("group", variables); util.openOutputFile(outGroupFile, outGroup); groups = ct.getNamesOfGroups(); } } if (m->getControl_pressed()) { out.close(); outputTypes.clear(); util.mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); util.mothurRemove(outGroupFile); } } return 0; } ifstream in; util.openInputFile(fastaFile, in); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); outputTypes.clear(); util.mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); util.mothurRemove(outGroupFile); } } return 0; } Sequence seq(in); gobble(in); if (seq.getName() != "") { if (nameFile != "") { //look for sequence name in nameMap map::iterator it = nameMap.find(seq.getName()); if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting.\n"); m->setControl_pressed(true); } else { vector names; util.splitAtComma(it->second, names); //output sequences for (int i = 0; i < names.size(); i++) { out << ">" << names[i] << endl; out << seq.getAligned() << endl; } //remove seq from name map so we can check for seqs in namefile not in fastafile later nameMap.erase(it); } }else { if (ct.hasGroupInfo()) { vector groupCounts = ct.getGroupCounts(seq.getName()); int count = 1; for (int i = 0; i < groups.size(); i++) { for (int j = 0; j < groupCounts[i]; j++) { outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++; } } } int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found for (int i = 0; i < numReps; i++) { out << ">" << seq.getName()+"_"+toString(i+1) << endl; out << seq.getAligned() << endl; } } } } in.close(); out.close(); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } } if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { util.mothurRemove(outGroupFile); } }return 0; } outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); } } m->mothurOut("\nOutput File Names: \n"); for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "execute"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/deuniqueseqscommand.h000077500000000000000000000017531424121717000224560ustar00rootroot00000000000000#ifndef DEUNIQUESEQSCOMMAND_H #define DEUNIQUESEQSCOMMAND_H /* * deuniqueseqscommand.h * Mothur * * Created by westcott on 10/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" /* This command is the reverse of unique.seqs */ class DeUniqueSeqsCommand : public Command { public: DeUniqueSeqsCommand(string); ~DeUniqueSeqsCommand() = default; vector setParameters(); string getCommandName() { return "deunique.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Deunique.seqs"; } string getDescription() { return "reverse of the unique.seqs command, and creates a fasta file from a fasta and name file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastaFile, nameFile, countfile; vector outputNames; bool abort; }; #endif mothur-1.48.0/source/commands/deuniquetreecommand.cpp000077500000000000000000000134371424121717000227770ustar00rootroot00000000000000/* * deuniquetreecommand.cpp * Mothur * * Created by westcott on 5/27/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "deuniquetreecommand.h" #include "treereader.h" //********************************************************************************************************************** vector DeuniqueTreeCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","tree",false,true,true); parameters.push_back(ptree); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["tree"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "DeuniqueTreeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string DeuniqueTreeCommand::getHelpString(){ try { string helpString = ""; helpString += "The deunique.tree command parameters are tree and name. Both parameters are required unless you have valid current files.\n"; helpString += "The deunique.tree command should be in the following format: deunique.tree(tree=yourTreeFile, name=yourNameFile).\n"; helpString += "Example deunique.tree(tree=abrecovery.tree, name=abrecovery.names).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "DeuniqueTreeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string DeuniqueTreeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "tree") { pattern = "[filename],deunique.tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "DeuniqueTreeCommand", "getOutputPattern"); exit(1); } } /***********************************************************/ DeuniqueTreeCommand::DeuniqueTreeCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("You have no current tree file and the tree parameter is required.\n"); abort = true; } }else { current->setTreeFile(treefile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { //if there is a current design file, use it namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { m->mothurOut("You have no current name file and the name parameter is required.\n"); abort = true; } }else { current->setNameFile(namefile); } if (outputdir == ""){ outputdir = util.hasPath(treefile); } } } catch(exception& e) { m->errorOut(e, "DeuniqueTreeCommand", "DeuniqueTreeCommand"); exit(1); } } /***********************************************************/ int DeuniqueTreeCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } current->setTreeFile(treefile); TreeReader* reader = new TreeReader(treefile, "", namefile); vector T = reader->getTrees(); map nameMap; util.readNames(namefile, nameMap); delete reader; //print new Tree map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); string outputFile = getOutputFileName("tree", variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream out; util.openOutputFile(outputFile, out); T[0]->print(out, nameMap); out.close(); delete (T[0]->getCountTable()); for (int i = 0; i < T.size(); i++) { delete T[i]; } //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "DeuniqueTreeCommand", "execute"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/deuniquetreecommand.h000077500000000000000000000017701424121717000224410ustar00rootroot00000000000000#ifndef DEUNIQUETREECOMMAND_H #define DEUNIQUETREECOMMAND_H /* * deuniquetreecommand.h * Mothur * * Created by westcott on 5/27/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "readtree.h" class DeuniqueTreeCommand : public Command { public: DeuniqueTreeCommand(string); ~DeuniqueTreeCommand() = default; vector setParameters(); string getCommandName() { return "deunique.tree"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Deunique.tree"; } string getDescription() { return "add the redundant sequence names back into a tree of unique sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int numUniquesInName; bool abort; string treefile, namefile; vector outputNames; map nameMap; int readNamesFile(); }; #endif mothur-1.48.0/source/commands/distancecommand.cpp000066400000000000000000001177621424121717000220750ustar00rootroot00000000000000/* * distancecommand.cpp * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "distancecommand.h" //********************************************************************************************************************** vector DistanceCommand::setParameters(){ try { CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn); CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true, true); parameters.push_back(pfasta); CommandParameter poutput("output", "Multiple", "column-lt-square", "column", "", "", "","phylip-column",false,false, true); parameters.push_back(poutput); CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap-jtt-pmb-pam-kimura", "onegap", "", "", "","",false,false); parameters.push_back(pcalc); CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends); CommandParameter pfitcalc("fitcalc", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfitcalc); CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors); CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false, true); parameters.push_back(pcutoff); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string DistanceCommand::getHelpString(){ try { string helpString = ""; helpString += "The dist.seqs command reads a file containing sequences and creates a distance file.\n"; helpString += "The dist.seqs command parameters are fasta, oldfasta, column, calc, countends, output, compress, cutoff and processors. \n"; helpString += "The fasta parameter is required, unless you have a valid current fasta file.\n"; helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n"; helpString += "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap for dna/rna sequences. If using protein sequences, your calc options are jtt, pmb, pam and kimura. The default is onegap.\n"; helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T.\n"; helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n"; helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n"; helpString += "The processors parameter allows you to specify number of processors to use. The default is 1.\n"; helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed. The default is false.\n"; helpString += "The dist.seqs command should be in the following format: \n"; helpString += "dist.seqs(fasta=yourFastaFile, calc=yourCalc, countends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) \n"; helpString += "Example dist.seqs(fasta=amazon.fasta, calc=eachgap, countends=F, cutoff= 2.0, processors=3).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string DistanceCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylip") { pattern = "[filename],[outputtag],dist"; } else if (type == "column") { pattern = "[filename],dist-[filename],[outputtag],dist"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** DistanceCommand::DistanceCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else if (fastafile == "not open") { abort = true; } else{ current->setFastaFile(fastafile); } oldfastafile = validParameter.validFile(parameters, "oldfasta"); if (oldfastafile == "not found") { oldfastafile = ""; } else if (oldfastafile == "not open") { abort = true; } column = validParameter.validFile(parameters, "column"); if (column == "not found") { column = ""; } else if (column == "not open") { abort = true; } else { current->setColumnFile(column); } if (outputdir == ""){ outputdir += util.hasPath(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "onegap"; } else { if (calc == "default") { calc = "onegap"; } } string temp; temp = validParameter.valid(parameters, "countends"); if(temp == "not found"){ temp = "T"; } countends = util.isTrue(temp); temp = validParameter.valid(parameters, "fitcalc"); if(temp == "not found"){ temp = "F"; } fitCalc = util.isTrue(temp); temp = validParameter.valid(parameters, "cutoff"); if(temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "compress"); if(temp == "not found"){ temp = "F"; } compress = util.isTrue(temp); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "column"; } if (output == "phylip") { output = "lt"; } if (((column != "") && (oldfastafile == "")) || ((column == "") && (oldfastafile != ""))) { m->mothurOut("If you provide column or oldfasta, you must provide both.\n"); abort=true; } if ((column != "") && (oldfastafile != "") && (output != "column")) { m->mothurOut("You have provided column and oldfasta, indicating you want to append distances to your column file. Your output must be in column format to do so.\n"); abort=true; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column.\n"); output = "column"; } if ((calc != "onegap") && (calc != "eachgap") && (calc != "nogaps") && (calc != "jtt") && (calc != "pmb") && (calc != "pam") && (calc != "kimura")) { m->mothurOut(calc + " is not a valid calc. Options are eachgap, onegap, nogaps, jtt, pmb, pam and kimura. I'll use onegap.\n"); calc = "onegap"; } prot = false; //not using protein sequences if ((calc == "jtt") || (calc == "pmb") || (calc == "pam") || (calc == "kimura")) { prot = true; } } } catch(exception& e) { m->errorOut(e, "DistanceCommand", "DistanceCommand"); exit(1); } } //********************************************************************************************************************** DistanceCommand::DistanceCommand(StorageDatabase*& storageDB, string outputFileRoot, double cut, string outputformat, int proc) : Command() { try { abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; calc = "onegap"; countends = true; fitCalc = false; cutoff = cut; processors = proc; compress = false; output = outputformat; prot = false; //not using protein sequences numDistsBelowCutoff = 0; db = storageDB; numNewFasta = db->getNumSeqs(); numSeqs = db->getNumSeqs(); if (!db->sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting.\n"); return; } if (numSeqs < 2) { m->mothurOut("[ERROR]: you must have at least 2 sequences to calculate the distances, aborting.\n"); return; } string outputFile; map variables; variables["[filename]"] = outputFileRoot; if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); util.mothurRemove(outputFile); } m->mothurOut("\nSequence\tTime\tNum_Dists_Below_Cutoff\n"); createProcesses(outputFile); m->mothurOut("\nOutput File Names:\n"); m->mothurOut(outputFile+"\n\n"); } catch(exception& e) { m->errorOut(e, "DistanceCommand", "DistanceCommand"); exit(1); } } //********************************************************************************************************************** int DistanceCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } numDistsBelowCutoff = 0; ifstream inFASTA; util.openInputFile(fastafile, inFASTA); if (prot) { db = new ProteinDB(inFASTA); } else { db = new SequenceDB(inFASTA); } inFASTA.close(); //save number of new sequence numNewFasta = db->getNumSeqs(); //sanity check the oldfasta and column file as well as add oldfasta sequences to db if ((oldfastafile != "") && (column != "")) { if (!(sanityCheck())) { return 0; } } if (m->getControl_pressed()) { delete db; return 0; } numSeqs = db->getNumSeqs(); if (!db->sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting.\n"); return 0; } if (numSeqs < 2) { m->mothurOut("[ERROR]: you must have at least 2 sequences to calculate the distances, aborting.\n"); return 0; } string outputFile; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); if ((oldfastafile != "") && (column != "")) { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(oldfastafile)); } if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format if (fitCalc) { variables["[outputtag]"] = "fit"; } outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite if (outputFile == column) { string tempcolumn = column + ".old"; rename(column.c_str(), tempcolumn.c_str()); } util.mothurRemove(outputFile); }else { //assume square variables["[outputtag]"] = "square"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } m->mothurOut("\nSequence\tTime\tNum_Dists_Below_Cutoff\n"); createProcesses(outputFile); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFile); return 0; } ifstream fileHandle; fileHandle.open(outputFile.c_str()); if(fileHandle) { gobble(fileHandle); if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.\n"); } } //append the old column file to the new one if ((oldfastafile != "") && (column != "")) { //we had to rename the column file so we didnt overwrite above, but we want to keep old name if (outputFile == column) { string tempcolumn = column + ".old"; util.appendFiles(tempcolumn, outputFile); util.mothurRemove(tempcolumn); }else{ if (!fitCalc) { util.appendFiles(outputFile, column); util.mothurRemove(outputFile); outputFile = column; } } outputTypes["column"].clear(); outputTypes["column"].push_back(outputFile); } if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFile+"\n\n"); if (compress) { m->mothurOut("Compressing...\n"); m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)\n"); system(("gzip -v " + outputFile).c_str()); outputNames.push_back(outputFile + ".gz"); }else { outputNames.push_back(outputFile); } return 0; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "execute"); exit(1); } } /**************************************************************************************************/ void driverColumn(distanceData* params){ try { ValidCalculators validCalculator; DistCalc* distCalculator; if (!params->prot) { if (params->countends) { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->calc == "eachgap") { distCalculator = new eachGapDist(params->cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapDist(params->cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->calc == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(params->cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(params->cutoff); } } } }else { if (validCalculator.isValidCalculator("protdist", params->calc) ) { if (params->calc == "jtt") { distCalculator = new JTT(params->cutoff); } else if (params->calc == "pmb") { distCalculator = new PMB(params->cutoff); } else if (params->calc == "pam") { distCalculator = new PAM(params->cutoff); } else if (params->calc == "kimura") { distCalculator = new Kimura(params->cutoff); } } } int startTime = time(nullptr); params->count = 0; string buffer = ""; for(int i=params->startLine;iendLine;i++){ Sequence seqI; Protein seqIP; string nameI = ""; if (params->prot) { seqIP = params->db->getProt(i); nameI = seqIP.getName(); } else { seqI = params->db->getSeq(i); nameI = seqI.getName(); } for(int j=0;jm->getControl_pressed()) { break; } if ((i >= params->numNewFasta) && (j >= params->numNewFasta)) { break; } double dist = 1.0; string nameJ = ""; if (params->prot) { Protein seqJP = params->db->getProt(j); nameJ = seqJP.getName(); dist = distCalculator->calcDist(seqIP, seqJP); } else { Sequence seqJ = params->db->getSeq(j); nameJ = seqJ.getName(); dist = distCalculator->calcDist(seqI, seqJ); } if(dist <= params->cutoff){ buffer += (nameI + " " + nameJ + " " + toString(dist) + "\n"); params->count++; } } if(i % 100 == 0){ params->threadWriter->write(buffer); buffer = ""; params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } } params->threadWriter->write(buffer); if((params->endLine-1) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->endLine-1) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } delete distCalculator; } catch(exception& e) { params->m->errorOut(e, "DistanceCommand", "driverColumn"); exit(1); } } /**************************************************************************************************/ void driverLt(distanceData* params){ try { ValidCalculators validCalculator; DistCalc* distCalculator; double cutoff = 1.0; if (!params->prot) { if (params->countends) { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->calc == "eachgap") { distCalculator = new eachGapDist(cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapDist(cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->calc == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(cutoff); } } } }else { if (validCalculator.isValidCalculator("protdist", params->calc) ) { if (params->calc == "jtt") { distCalculator = new JTT(params->cutoff); } else if (params->calc == "pmb") { distCalculator = new PMB(params->cutoff); } else if (params->calc == "pam") { distCalculator = new PAM(params->cutoff); } else if (params->calc == "kimura") { distCalculator = new Kimura(params->cutoff); } } } int startTime = time(nullptr); long long numSeqs = params->db->getNumSeqs(); //column file ofstream outFile; params->util.openOutputFile(params->outputFileName, outFile); outFile.setf(ios::fixed, ios::showpoint); outFile << setprecision(4); if(params->startLine == 0){ outFile << numSeqs << endl; } params->count = 0; for(int i=params->startLine;iendLine;i++){ Sequence seqI; Protein seqIP; string nameI = ""; if (params->prot) { seqIP = params->db->getProt(i); nameI = seqIP.getName(); } else { seqI = params->db->getSeq(i); nameI = seqI.getName(); } if (nameI.length() < 10) { while (nameI.length() < 10) { nameI += " "; } } outFile << nameI; for(int j=0;jm->getControl_pressed()) { break; } if ((i >= params->numNewFasta) && (j >= params->numNewFasta)) { break; } double dist = 1.0; if (params->prot) { Protein seqJP = params->db->getProt(j); dist = distCalculator->calcDist(seqIP, seqJP); } else { Sequence seqJ = params->db->getSeq(j); dist = distCalculator->calcDist(seqI, seqJ); } if(dist <= params->cutoff){ params->count++; } outFile << '\t' << dist; } outFile << endl; if(i % 100 == 0){ params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } } if((params->endLine-1) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->endLine-1) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } outFile.close(); delete distCalculator; } catch(exception& e) { params->m->errorOut(e, "DistanceCommand", "driverLt"); exit(1); } } /**************************************************************************************************/ void driverSquare(distanceData* params){ try { ValidCalculators validCalculator; DistCalc* distCalculator; double cutoff = 1.0; if (!params->prot) { if (params->countends) { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->calc == "eachgap") { distCalculator = new eachGapDist(cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapDist(cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->calc == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(cutoff); } } } }else { if (validCalculator.isValidCalculator("protdist", params->calc) ) { if (params->calc == "jtt") { distCalculator = new JTT(params->cutoff); } else if (params->calc == "pmb") { distCalculator = new PMB(params->cutoff); } else if (params->calc == "pam") { distCalculator = new PAM(params->cutoff); } else if (params->calc == "kimura") { distCalculator = new Kimura(params->cutoff); } } } int startTime = time(nullptr); //column file ofstream outFile; params->util.openOutputFile(params->outputFileName, outFile); outFile.setf(ios::fixed, ios::showpoint); outFile << setprecision(4); long long numSeqs = params->db->getNumSeqs(); if(params->startLine == 0){ outFile << numSeqs << endl; } params->count = 0; for(int i=params->startLine;iendLine;i++){ Sequence seqI; Protein seqIP; string nameI = ""; if (params->prot) { seqIP = params->db->getProt(i); nameI = seqIP.getName(); } else { seqI = params->db->getSeq(i); nameI = seqI.getName(); } if (nameI.length() < 10) { while (nameI.length() < 10) { nameI += " "; } } outFile << nameI << '\t'; for(int j=0;jm->getControl_pressed()) { break; } double dist = 1.0; if (i == j) { dist = 0.0000; } else { if (params->prot) { Protein seqJP = params->db->getProt(j); dist = distCalculator->calcDist(seqIP, seqJP); } else { Sequence seqJ = params->db->getSeq(j); dist = distCalculator->calcDist(seqI, seqJ); } } if(dist <= params->cutoff){ params->count++; } outFile << dist << '\t'; } outFile << endl; if(i % 100 == 0){ params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } } if((params->endLine-1) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->endLine-1) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } outFile.close(); delete distCalculator; } catch(exception& e) { params->m->errorOut(e, "DistanceCommand", "driverSquare"); exit(1); } } /**************************************************************************************************/ void driverFitCalc(distanceData* params){ try { ValidCalculators validCalculator; DistCalc* distCalculator; if (!params->prot) { if (params->countends) { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->calc == "eachgap") { distCalculator = new eachGapDist(params->cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapDist(params->cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->calc) ) { if (params->calc == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->calc == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(params->cutoff); } else if (params->calc == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(params->cutoff); } } } }else { if (validCalculator.isValidCalculator("protdist", params->calc) ) { if (params->calc == "jtt") { distCalculator = new JTT(params->cutoff); } else if (params->calc == "pmb") { distCalculator = new PMB(params->cutoff); } else if (params->calc == "pam") { distCalculator = new PAM(params->cutoff); } else if (params->calc == "kimura") { distCalculator = new Kimura(params->cutoff); } } } int startTime = time(nullptr); params->count = 0; string buffer = ""; for(int i=params->startLine;iendLine;i++){ Sequence seqI; Protein seqIP; string nameI = ""; if (params->prot) { seqIP = params->oldFastaDB->getProt(i); nameI = seqIP.getName(); } else { seqI = params->oldFastaDB->getSeq(i); nameI = seqI.getName(); } for(int j = 0; j < params->db->getNumSeqs(); j++){ if (params->m->getControl_pressed()) { break; } double dist = 1.0; string nameJ = ""; if (params->prot) { Protein seqJP = params->db->getProt(j); nameJ = seqJP.getName(); dist = distCalculator->calcDist(seqIP, seqJP); } else { Sequence seqJ = params->db->getSeq(j); nameJ = seqJ.getName(); dist = distCalculator->calcDist(seqI, seqJ); } if(dist <= params->cutoff){ buffer += nameI + " " + nameJ + " " + toString(dist) + "\n"; params->count++; } } if(i % 100 == 0){ params->threadWriter->write(buffer); buffer = ""; params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } } params->threadWriter->write(buffer); if((params->endLine-1) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->endLine-1) + "\t" + toString(time(nullptr) - startTime) + "\t" + toString(params->count) +"\n"); } delete distCalculator; } catch(exception& e) { params->m->errorOut(e, "DistanceCommand", "driverFitCalc"); exit(1); } } /**************************************************************************************************/ void DistanceCommand::createProcesses(string filename) { try { long long num = db->getNumSeqs(); long long distsBelowCutoff = 0; time_t start, end; time(&start); //create array of worker threads vector workerThreads; vector data; double numDists = 0; if (output == "square") { numDists = numSeqs; } else { for(int i=0;i processors) { break; } } } } if (numDists < processors) { processors = numDists; } vector lines; for (int i = 0; i < processors; i++) { linePair tempLine; lines.push_back(tempLine); if (output != "square") { lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs); lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs); }else{ lines[i].start = int ((float(i)/float(processors)) * numSeqs); lines[i].end = int ((float(i+1)/float(processors)) * numSeqs); } } auto synchronizedOutputFile = std::make_shared(filename); synchronizedOutputFile->setFixedShowPoint(); synchronizedOutputFile->setPrecision(4); StorageDatabase* oldFastaDB; if (fitCalc) { ifstream inFASTA; util.openInputFile(oldfastafile, inFASTA); if (!prot) { oldFastaDB = new SequenceDB(inFASTA); } else { oldFastaDB = new ProteinDB(inFASTA); } inFASTA.close(); lines.clear(); if (processors > oldFastaDB->getNumSeqs()) { processors = oldFastaDB->getNumSeqs(); } int remainingSeqs = oldFastaDB->getNumSeqs(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numSeqsToFit = remainingSeqs; //case for last processor if (remainingProcessors != 1) { numSeqsToFit = ceil(remainingSeqs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numSeqsToFit))); //startIndex, endIndex startIndex = startIndex + numSeqsToFit; remainingSeqs -= numSeqsToFit; } } //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadWriter = nullptr; distanceData* dataBundle = nullptr; string extension = toString(i+1) + ".temp"; if (output == "column") { threadWriter = new OutputWriter(synchronizedOutputFile); dataBundle = new distanceData(threadWriter); }else { dataBundle = new distanceData(filename+extension); } dataBundle->setVariables(lines[i+1].start, lines[i+1].end, cutoff, db, oldFastaDB, calc, prot, numNewFasta, countends); data.push_back(dataBundle); std::thread* thisThread = nullptr; if (output == "column") { if (fitCalc) { thisThread = new std::thread(driverFitCalc, dataBundle); } else { thisThread = new std::thread(driverColumn, dataBundle); } } else if (output == "lt") { thisThread = new std::thread(driverLt, dataBundle); } else { thisThread = new std::thread(driverSquare, dataBundle); } workerThreads.push_back(thisThread); } OutputWriter* threadWriter = nullptr; distanceData* dataBundle = nullptr; if (output == "column") { threadWriter = new OutputWriter(synchronizedOutputFile); dataBundle = new distanceData(threadWriter); }else { dataBundle = new distanceData(filename); } dataBundle->setVariables(lines[0].start, lines[0].end, cutoff, db, oldFastaDB, calc, prot, numNewFasta, countends); if (output == "column") { if (fitCalc) { driverFitCalc(dataBundle); } else { driverColumn(dataBundle); } } else if (output == "lt") { driverLt(dataBundle); } else { driverSquare(dataBundle); } distsBelowCutoff = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); distsBelowCutoff += data[i]->count; if (output == "column") { delete data[i]->threadWriter; } else { string extension = toString(i+1) + ".temp"; util.appendFiles((filename+extension), filename); util.mothurRemove(filename+extension); } delete data[i]; delete workerThreads[i]; } if (output == "column") { synchronizedOutputFile->close(); delete threadWriter; } delete dataBundle; time(&end); m->mothurOut("\nIt took " + toString(difftime(end, start)) + " secs to find distances for " + toString(num) + " sequences. " + toString(distsBelowCutoff+numDistsBelowCutoff) + " distances below cutoff " + toString(cutoff) + ".\n\n"); } catch(exception& e) { m->errorOut(e, "DistanceCommand", "createProcesses"); exit(1); } } /**************************************************************************************************/ //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff, //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it. //also check to make sure the 2 files have the same alignment length. bool DistanceCommand::sanityCheck() { try{ bool good = true; //make sure the 2 fasta files have the same alignment length ifstream in; util.openInputFile(fastafile, in); int fastaAlignLength = 0; if (in) { if (!prot) { Sequence tempIn(in); fastaAlignLength = tempIn.getAligned().length(); }else { Protein tempIn(in); fastaAlignLength = tempIn.getAligned().size(); } } in.close(); ifstream in2; util.openInputFile(oldfastafile, in2); int oldfastaAlignLength = 0; if (in2) { if (!prot) { Sequence tempIn(in2); oldfastaAlignLength = tempIn.getAligned().length(); }else { Protein tempIn(in2); oldfastaAlignLength = tempIn.getAligned().size(); } } in2.close(); if (fastaAlignLength != oldfastaAlignLength) { m->mothurOut("fasta files do not have the same alignment length.\n"); return false; } //read fasta file and save names as well as adding them to the alignDB set namesOldFasta; ifstream inFasta; util.openInputFile(oldfastafile, inFasta); while (!inFasta.eof()) { if (m->getControl_pressed()) { inFasta.close(); return good; } if (!prot) { Sequence temp(inFasta); gobble(inFasta); if (temp.getName() != "") { namesOldFasta.insert(temp.getName()); //save name if (!fitCalc) { db->push_back(temp); }//add to DB } }else { Protein temp(inFasta); gobble(inFasta); if (temp.getName() != "") { namesOldFasta.insert(temp.getName()); //save name if (!fitCalc) { db->push_back(temp); }//add to DB } } } inFasta.close(); //read through the column file checking names and removing distances above the cutoff ifstream inDist; util.openInputFile(column, inDist); ofstream outDist; string outputFile = column + ".temp"; util.openOutputFile(outputFile, outDist); string name1, name2; float dist; while (!inDist.eof()) { if (m->getControl_pressed()) { inDist.close(); outDist.close(); util.mothurRemove(outputFile); return good; } inDist >> name1; gobble(inDist); inDist >> name2; gobble(inDist); inDist >> dist; gobble(inDist); //both names are in fasta file and distance is below cutoff if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) { good = false; break; } else{ if (dist <= cutoff) { numDistsBelowCutoff++; outDist << name1 << '\t' << name2 << '\t' << dist << endl; } } } inDist.close(); outDist.close(); if (good) { util.mothurRemove(column); rename(outputFile.c_str(), column.c_str()); }else{ util.mothurRemove(outputFile); //temp file is bad because file mismatch above } return good; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "sanityCheck"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/distancecommand.h000077500000000000000000000056001424121717000215300ustar00rootroot00000000000000#ifndef DISTANCECOMMAND_H #define DISTANCECOMMAND_H /* * distancecommand.h * Mothur * * Created by Sarah Westcott on 5/7/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "validcalculator.h" #include "calculator.h" #include "sequencedb.h" #include "ignoregaps.h" #include "eachgapdist.h" #include "eachgapignore.h" #include "onegapdist.h" #include "onegapignore.h" #include "jtt.hpp" #include "writer.h" #include "proteindb.hpp" #include "pmb.hpp" #include "pam.hpp" #include "kimura.hpp" /**************************************************************************************************/ struct distanceData { long long startLine, endLine, numNewFasta, count; float cutoff; StorageDatabase* db; StorageDatabase* oldFastaDB; MothurOut* m; OutputWriter* threadWriter; string outputFileName, calc; bool countends, prot; Utils util; distanceData(){} distanceData(OutputWriter* ofn) { threadWriter = ofn; m = MothurOut::getInstance(); } distanceData(string ofn) { outputFileName = ofn; m = MothurOut::getInstance(); } void setVariables(int s, int e, float c, StorageDatabase*& dbsp, StorageDatabase*& oldfn, string Est, bool met, long long num, bool cnt) { startLine = s; endLine = e; cutoff = c; db = dbsp; oldFastaDB = oldfn; calc = Est; prot = met; numNewFasta = num; countends = cnt; count = 0; } }; /**************************************************************************************************/ class DistanceCommand : public Command { public: DistanceCommand(string); DistanceCommand(StorageDatabase*&, string, double, string, int); //used by mothur's splitMatrix class to avoid rereading files ~DistanceCommand() = default; vector setParameters(); string getCommandName() { return "dist.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844. \nhttp://www.mothur.org/wiki/Dist.seqs"; } string getDescription() { return "calculate the pairwaise distances between aligned sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: StorageDatabase* db; string output, fastafile, calc, oldfastafile, column; int processors; long long numNewFasta, numSeqs, numDistsBelowCutoff; float cutoff; bool abort, countends, fitCalc, prot, compress; vector outputNames; void createProcesses(string); bool sanityCheck(); }; #endif /**************************************************************************************************/ mothur-1.48.0/source/commands/distsharedcommand.cpp000077500000000000000000000775621424121717000224430ustar00rootroot00000000000000/* * distsharedcommand.cpp * Mothur * * Created by Sarah Westcott on 5/20/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "distsharedcommand.h" #include "subsample.h" //********************************************************************************************************************** vector DistSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","phylip",false,true,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["phylip"] = tempOutNames; abort = false; calledHelp = false; allLines = true; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string DistSharedCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The dist.shared command parameters are shared, groups, calc, output, processors, subsample, iters, mode, and label. shared is a required, unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n"; helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like distance matrices created for, and is also separated by dashes.\n"; helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n"; helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, column and square. The default is lt.\n"; helpString += "The mode parameter allows you to specify if you want the average or the median values reported when subsampling. Options are average, and median. The default is average.\n"; helpString += "Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n"; helpString += "The default value for groups is all the groups in your groupfile.\n"; helpString += "The default value for calc is jclass and thetayc.\n"; helpString += validCalculator.printCalc("matrix"); helpString += "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string DistSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],dist-[filename],[calc],[distance],[outputtag],[tag2],dist"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** DistSharedCommand::DistSharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else if (sharedfile == "not open") { sharedfile = ""; abort = true; } else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir += util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "lt"; } if ((output != "lt") && (output != "square") && (output != "column")) { m->mothurOut(output + " is not a valid output form. Options are lt, column and square. I will use lt.\n"); output = "lt"; } mode = validParameter.valid(parameters, "mode"); if(mode == "not found"){ mode = "average"; } if ((mode != "average") && (mode != "median")) { m->mothurOut(mode + " is not a valid mode. Options are average and medina. I will use average.\n"); output = "average"; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "jclass-thetayc"; } else { if (calc == "default") { calc = "jclass-thetayc"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (subsample == false) { iters = 1; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "DistSharedCommand"); exit(1); } } //********************************************************************************************************************** DistSharedCommand::~DistSharedCommand(){} //********************************************************************************************************************** int DistSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } time_t start = time(nullptr); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (lookup->size() < 2) { m->mothurOut("[ERROR]: You have not provided enough valid groups. I cannot run the command.\n"); delete lookup; return 0;} if (subsample) { if (subsampleSize == -1) { //user has not set size, set size = smallest samples size subsampleSize = lookup->getNumSeqsSmallestGroup(); m->mothurOut("\nSetting sample size to " + toString(subsampleSize) + ".\n\n"); }else { lookup->removeGroups(subsampleSize); Groups = lookup->getNamesGroups(); } if (lookup->size() < 2) { m->mothurOut("[ERROR]: You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return 0; } } numGroups = lookup->size(); if (m->getControl_pressed()) { delete lookup; return 0; } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } createProcesses(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to run dist.shared.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "execute"); exit(1); } } /***********************************************************/ void DistSharedCommand::printDists(ostream& out, vector< vector >& simMatrix, vector groupNames) { try { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); if (output == "lt") { out << simMatrix.size() << endl; for (int b = 0; b < simMatrix.size(); b++) { out << groupNames[b]; for (int n = 0; n < b; n++) { out << '\t' << simMatrix[b][n]; } out << endl; } }else if (output == "column") { for (int b = 0; b < simMatrix.size(); b++) { for (int n = 0; n < b; n++) { out << groupNames[b] << '\t' << groupNames[n] << '\t' << simMatrix[b][n] << endl; } } }else{ out << simMatrix.size() << endl; for (int b = 0; b < simMatrix.size(); b++) { out << groupNames[b]; for (int n = 0; n < simMatrix[b].size(); n++) { out << '\t' << simMatrix[b][n]; } out << endl; } } } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "printSims"); exit(1); } } /**************************************************************************************************/ int driver(vector& thisLookup, vector< vector >& calcDists, vector matrixCalculators, MothurOut* m) { try { vector subset; for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare for (int l = 0; l < k; l++) { if (k != l) { //we dont need to similarity of a groups to itself subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); for(int i=0;igetNeedsAll()) { //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < thisLookup.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); } } } vector tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs if (m->getControl_pressed()) { return 1; } seqDist temp(l, k, tempdata[0]); calcDists[i].push_back(temp); } } } } return 0; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "driver"); exit(1); } } /***********************************************************/ int process(distSharedData* params){ try { vector matrixCalculators; ValidCalculators validCalculator; for (int i=0; iEstimators.size(); i++) { if (validCalculator.isValidCalculator("matrix", params->Estimators[i]) ) { if (params->Estimators[i] == "sharedsobs") { matrixCalculators.push_back(new SharedSobsCS()); }else if (params->Estimators[i] == "sharedchao") { matrixCalculators.push_back(new SharedChao1()); }else if (params->Estimators[i] == "sharedace") { matrixCalculators.push_back(new SharedAce()); }else if (params->Estimators[i] == "jabund") { matrixCalculators.push_back(new JAbund()); }else if (params->Estimators[i] == "sorabund") { matrixCalculators.push_back(new SorAbund()); }else if (params->Estimators[i] == "jclass") { matrixCalculators.push_back(new Jclass()); }else if (params->Estimators[i] == "sorclass") { matrixCalculators.push_back(new SorClass()); }else if (params->Estimators[i] == "jest") { matrixCalculators.push_back(new Jest()); }else if (params->Estimators[i] == "sorest") { matrixCalculators.push_back(new SorEst()); }else if (params->Estimators[i] == "thetayc") { matrixCalculators.push_back(new ThetaYC()); }else if (params->Estimators[i] == "thetan") { matrixCalculators.push_back(new ThetaN()); }else if (params->Estimators[i] == "kstest") { matrixCalculators.push_back(new KSTest()); }else if (params->Estimators[i] == "sharednseqs") { matrixCalculators.push_back(new SharedNSeqs()); }else if (params->Estimators[i] == "ochiai") { matrixCalculators.push_back(new Ochiai()); }else if (params->Estimators[i] == "anderberg") { matrixCalculators.push_back(new Anderberg()); }else if (params->Estimators[i] == "kulczynski") { matrixCalculators.push_back(new Kulczynski()); }else if (params->Estimators[i] == "kulczynskicody") { matrixCalculators.push_back(new KulczynskiCody()); }else if (params->Estimators[i] == "lennon") { matrixCalculators.push_back(new Lennon()); }else if (params->Estimators[i] == "morisitahorn") { matrixCalculators.push_back(new MorHorn()); }else if (params->Estimators[i] == "braycurtis") { matrixCalculators.push_back(new BrayCurtis()); }else if (params->Estimators[i] == "whittaker") { matrixCalculators.push_back(new Whittaker()); }else if (params->Estimators[i] == "odum") { matrixCalculators.push_back(new Odum()); }else if (params->Estimators[i] == "canberra") { matrixCalculators.push_back(new Canberra()); }else if (params->Estimators[i] == "structeuclidean") { matrixCalculators.push_back(new StructEuclidean()); }else if (params->Estimators[i] == "structchord") { matrixCalculators.push_back(new StructChord()); }else if (params->Estimators[i] == "hellinger") { matrixCalculators.push_back(new Hellinger()); }else if (params->Estimators[i] == "manhattan") { matrixCalculators.push_back(new Manhattan()); }else if (params->Estimators[i] == "structpearson") { matrixCalculators.push_back(new StructPearson()); }else if (params->Estimators[i] == "soergel") { matrixCalculators.push_back(new Soergel()); }else if (params->Estimators[i] == "spearman") { matrixCalculators.push_back(new Spearman()); }else if (params->Estimators[i] == "structkulczynski") { matrixCalculators.push_back(new StructKulczynski()); }else if (params->Estimators[i] == "speciesprofile") { matrixCalculators.push_back(new SpeciesProfile()); }else if (params->Estimators[i] == "hamming") { matrixCalculators.push_back(new Hamming()); }else if (params->Estimators[i] == "structchi2") { matrixCalculators.push_back(new StructChi2()); }else if (params->Estimators[i] == "gower") { matrixCalculators.push_back(new Gower()); }else if (params->Estimators[i] == "memchi2") { matrixCalculators.push_back(new MemChi2()); }else if (params->Estimators[i] == "memchord") { matrixCalculators.push_back(new MemChord()); }else if (params->Estimators[i] == "memeuclidean") { matrixCalculators.push_back(new MemEuclidean()); }else if (params->Estimators[i] == "mempearson") { matrixCalculators.push_back(new MemPearson()); }else if (params->Estimators[i] == "jsd") { matrixCalculators.push_back(new JSD()); }else if (params->Estimators[i] == "rjsd") { matrixCalculators.push_back(new RJSD()); } } } //if the users entered no valid calculators don't execute command if (matrixCalculators.size() == 0) { params->m->mothurOut("No valid calculators.\n"); return 0; } params->Estimators.clear(); for (int i=0; iEstimators.push_back(matrixCalculators[i]->getName()); } vector< vector > calcDists; calcDists.resize(matrixCalculators.size()); SubSample sample; for (int thisIter = 0; thisIter < params->numIters; thisIter++) { SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*params->thisLookup); vector namesOfGroups = thisItersLookup->getNamesGroups(); time_t start = time(nullptr); if (params->subsample) { if (params->withReplacement) { sample.getSampleWithReplacement(thisItersLookup, params->subsampleSize); } else { sample.getSample(thisItersLookup, params->subsampleSize); } } if (params->m->getDebug()) { params->m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to subsample the shared file.\n"); } //params->m->mothurOut(toString(thisIter) + " It took " + toString(time(nullptr) - start) + " seconds to subsample the shared file.\n"); vector thisItersRabunds = thisItersLookup->getSharedRAbundVectors(); vector thisItersGroupNames = params->thisLookup->getNamesGroups(); start = time(nullptr); driver(thisItersRabunds, calcDists, matrixCalculators, params->m); if (params->m->getDebug()) { params->m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to calc dist for shared file.\n"); } //params->m->mothurOut(toString(thisIter) + " It took " + toString(time(nullptr) - start) + " seconds to calc dist for shared file.\n"); for (int i = 0; i < thisItersRabunds.size(); i++) { delete thisItersRabunds[i]; } if (params->subsample){ if((thisIter+1) % 100 == 0){ params->m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); } params->calcDistsTotals.push_back(calcDists); for (int i = 0; i < calcDists.size(); i++) { for (int j = 0; j < calcDists[i].size(); j++) { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisItersGroupNames[calcDists[i][j].seq1] + " - " + thisItersGroupNames[calcDists[i][j].seq2] + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } } } }else { //print results for whole dataset for (int i = 0; i < calcDists.size(); i++) { if (params->m->getControl_pressed()) { break; } //initialize matrix vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisItersLookup->size()); for (int k = 0; k < thisItersLookup->size(); k++) { matrix[k].resize(thisItersLookup->size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } params->matrices.push_back(matrix); } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } delete thisItersLookup; } if((params->numIters) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->numIters)+"\n"); } return 0; } catch(exception& e) { params->m->errorOut(e, "DistSharedCommand", "process"); exit(1); } } /***********************************************************/ int DistSharedCommand::createProcesses(SharedRAbundVectors*& thisLookup){ try { vector groupNames = thisLookup->getNamesGroups(); vector lines; if (processors > (iters)) { processors = iters; } //figure out how many sequences you have to process int numItersPerProcessor = (iters) / processors; for (int i = 0; i < processors; i++) { if(i == (processors - 1)){ numItersPerProcessor = (iters) - i * numItersPerProcessor; } lines.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup); distSharedData* dataBundle = new distSharedData(lines[i+1], false, subsample, subsampleSize, withReplacement, Estimators, newLookup); data.push_back(dataBundle); workerThreads.push_back(new std::thread(process, dataBundle)); } //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup); distSharedData* dataBundle = new distSharedData(lines[0], true, subsample, subsampleSize, withReplacement, Estimators, newLookup); process(dataBundle); delete newLookup; Estimators.clear(); Estimators = dataBundle->Estimators; if (!subsample) { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = thisLookup->getLabel(); variables["[tag2]"] = ""; variables["[outputtag]"] = output; /// fix to print out matrices for each calc - only main does this for (int i = 0; i < Estimators.size(); i++) { variables["[calc]"] = Estimators[i]; string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outDist; util.openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); printDists(outDist, dataBundle->matrices[i], groupNames); outDist.close(); } } vector< vector< vector > > calcDistsTotals = dataBundle->calcDistsTotals; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); //get calcDistsTotal info - one entry per iter for (int j = 0; j < data[i]->calcDistsTotals.size(); j++) { calcDistsTotals.push_back(data[i]->calcDistsTotals[j]); } delete data[i]->thisLookup; delete data[i]; delete workerThreads[i]; } delete dataBundle; if (subsample) { //we need to find the average distance and standard deviation for each groups distance vector< vector > calcAverages = util.getAverages(calcDistsTotals, mode); //find standard deviation vector< vector > stdDev = util.getStandardDeviation(calcDistsTotals, calcAverages); //print results for (int i = 0; i < Estimators.size(); i++) { vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { matrix[k].resize(thisLookup->size(), 0.0); } vector< vector > stdmatrix; //square matrix to represent the stdDev stdmatrix.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { stdmatrix[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; float stdDist = stdDev[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; stdmatrix[row][column] = stdDist; stdmatrix[column][row] = stdDist; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = thisLookup->getLabel(); variables["[outputtag]"] = output; variables["[tag2]"] = "ave"; variables["[calc]"] = Estimators[i]; string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); //set current phylip file to average distance matrix current->setPhylipFile(distFileName); ofstream outAve; util.openOutputFile(distFileName, outAve); outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); printDists(outAve, matrix, groupNames); outAve.close(); variables["[tag2]"] = "std"; distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outSTD; util.openOutputFile(distFileName, outSTD); outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); printDists(outSTD, stdmatrix, thisLookup->getNamesGroups()); outSTD.close(); } } return 0; } catch(exception& e) { m->errorOut(e, "DistSharedCommand", "createProcesses"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/distsharedcommand.h000077500000000000000000000076411424121717000220770ustar00rootroot00000000000000#ifndef MATRIXOUTPUTCOMMAND_H #define MATRIXOUTPUTCOMMAND_H /* * distsharedcommand.h * Mothur * * Created by Sarah Westcott on 5/20/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "groupmap.h" #include "validcalculator.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharednseqs.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedkstest.h" #include "whittaker.h" #include "sharedochiai.h" #include "sharedanderbergs.h" #include "sharedkulczynski.h" #include "sharedkulczynskicody.h" #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" #include "whittaker.h" #include "odum.h" #include "canberra.h" #include "structeuclidean.h" #include "structchord.h" #include "hellinger.h" #include "manhattan.h" #include "structpearson.h" #include "soergel.h" #include "spearman.h" #include "structkulczynski.h" #include "structchi2.h" #include "speciesprofile.h" #include "hamming.h" #include "gower.h" #include "memchi2.h" #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" #include "sharedrjsd.h" // aka. dist.shared() /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. The user can select the labels they wish to use as well as the groups they would like included. They can also use as many or as few calculators as they wish. */ class DistSharedCommand : public Command { public: DistSharedCommand(string); ~DistSharedCommand(); vector setParameters(); string getCommandName() { return "dist.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; } string getDescription() { return "generate a distance matrix that describes the dissimilarity among multiple groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string exportFileName, output, sharedfile; int numGroups, processors, iters, subsampleSize; ofstream out; bool abort, allLines, subsample, withReplacement; set labels; //holds labels to be used string outputFile, calc, groups, label, mode; vector Estimators, Groups, outputNames; //holds estimators to be used int createProcesses(SharedRAbundVectors*&); int driver(vector&, vector< vector >&, vector); void printDists(ostream&, vector< vector >&, vector); }; /**************************************************************************************************/ struct distSharedData { SharedRAbundVectors* thisLookup; vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector< vector > > matrices; //for each calculator a square matrix to represent the distances, only filled by main thread vector Estimators; long long numIters; MothurOut* m; int count, subsampleSize; bool mainThread, subsample, withReplacement; distSharedData(){} distSharedData(long long st, bool mt, bool su, int subsize, bool wr, vector est, SharedRAbundVectors* lu) { m = MothurOut::getInstance(); numIters = st; Estimators = est; thisLookup = lu; count = 0; mainThread = mt; subsample = su; subsampleSize = subsize; withReplacement = wr; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/diversityestimatorcommand.cpp000066400000000000000000001274331424121717000242510ustar00rootroot00000000000000// // diversityestimatorcommand.cpp // Mothur // // Created by Sarah Westcott on 4/4/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "diversityestimatorcommand.hpp" #include "erarefaction.hpp" #include "metroig.hpp" #include "metrolognormal.hpp" #include "metrologstudent.hpp" #include "metrosichel.hpp" #include "igabundance.hpp" #include "igrarefaction.hpp" #include "lnabundance.hpp" #include "lnrarefaction.hpp" #include "lnshift.hpp" #include "lsabundance.hpp" #include "lsrarefaction.hpp" #include "siabundance.hpp" #include "sirarefaction.hpp" #include "sishift.hpp" //********************************************************************************************************************** vector EstimatorSingleCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(psabund); CommandParameter psample("sample", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(psample); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter pcalc("calc", "Multiple", "erarefact-ig-ln-ls-si-igabund-igrarefact-lnrarefact-lnabund-lnshift-lsabund-lsrarefact-siabund-sirarefact-sishift", "ig", "", "", "","",false,false,true); parameters.push_back(pcalc); //lnabund CommandParameter palpha("sigmaa", "Number", "", "0.1", "", "", "","",false,false,true); parameters.push_back(palpha); CommandParameter pbeta("sigmab", "Number", "", "0.1", "", "", "","",false,false); parameters.push_back(pbeta); CommandParameter psigman("sigman", "Number", "", "0.1", "", "", "","",false,false); parameters.push_back(psigman); CommandParameter psigmas("sigmas", "Number", "", "100", "", "", "","",false,false); parameters.push_back(psigmas); CommandParameter pburn("burn", "Number", "", "2000000", "", "", "","",false,false); parameters.push_back(pburn); CommandParameter pcoverage("coverage", "Number", "", "0.8", "", "", "","",false,false); parameters.push_back(pcoverage); CommandParameter pfit("fit", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pfit); CommandParameter psamplenum("burnsample", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(psamplenum); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["erarefact"] = tempOutNames; outputTypes["igrarefact"] = tempOutNames; outputTypes["igabund"] = tempOutNames; outputTypes["lnabund"] = tempOutNames; outputTypes["lnrarefact"] = tempOutNames; outputTypes["lnshift"] = tempOutNames; outputTypes["lsabund"] = tempOutNames; outputTypes["lsrarefact"] = tempOutNames; outputTypes["siabund"] = tempOutNames; outputTypes["sirarefact"] = tempOutNames; outputTypes["sishift"] = tempOutNames; outputTypes["ig"] = tempOutNames; outputTypes["ln"] = tempOutNames; outputTypes["ls"] = tempOutNames; outputTypes["si"] = tempOutNames; outputTypes["sample"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string EstimatorSingleCommand::getHelpString(){ try { string helpString = "\n"; ValidCalculators validCalculator; helpString += "The estimator.single command parameters are " + getCommandParameters() + ". You may only choose one calculator at a time.\n"; helpString += "The estimator.single command should be in the following format: \n"; helpString += "estimator.single(list=yourListFile, calc=yourEstimators).\n"; helpString += "Example estimator.single(list=final.opti_mcc.list, calc=erarefaction).\n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "The sample file is used to provide mcmc sampling to the calculators.\n"; helpString += "The default values for freq is 100, and calc is erarefaction.\n"; helpString += "The sigmaa parameter is used to set the std. dev. of alpha / X / mean prop. distn for MetroIG / MetroLogNormal / MetroLogStudent / MetroSichel, respectively. Default = 0.10. n"; helpString += "The sigmab parameter is used to set the std. dev. of beta / Y / V prop. distn for MetroIG / MetroLogNormal / MetroLogStudent / MetroSichel, respectively. Default = 0.10. n"; helpString += "The sigman parameter is used to set the std. dev. of N / Gamma prop. distn for MetroLogStudent / MetroSichel, respectively. Default = 0.10.\n"; helpString += "The sigmas parameter is used to set the std. dev. of S prop. distn for MetroIG / MetroLogNormal / MetroLogStudent / MetroSichel. Default = 100. n"; helpString += "The coverage parameter allows you to the desired coverage. Default=0.8.\n"; helpString += "The iters parameter allows you to set number of mcmc samples to generate. The default is 250000.\n"; helpString += "The burn parameter allows ignore part of the sampling file. Default = 200000 / 100000 for IGAbundance, LNShift, LSAbundance / IGRarefaction, LNRarefaction, LSRarefaction, SIAbundance, SIRarefaction, SIShift respectively.\n"; helpString += "The burnsample parameter allows you to set sampling frequency. The default is 1000 / 100 for IGAbundance, LNShift, LSAbundance / IGRarefaction, LNRarefaction, LSRarefaction, SIAbundance, SIRarefaction, SIShift respectively.\n"; helpString += "The fit parameter is used to indicate to mothur you want mothur to auto adjust the sampling data parameters. default=10, meaning try fitting 10 times. \n"; helpString += validCalculator.printCalc("estimator"); helpString += "Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string EstimatorSingleCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string howto = "How do you create the sampling files?"; howtos.push_back(howto); string hanswer = "\tRun a short trial MCMC run of 1000 iterations with guessed std. dev.s for the proposal distributions say about 10% of the parameter values. Adjust the std. dev.s until the acceptance ratios are about 0.5. Then perform a longer run of say 250,000 iterations (mothur's default). Three data files with posterior samples for three different sets of parameter values will be generated.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string EstimatorSingleCommand::getOutputPattern(string type) { try { string pattern = "[filename],[distance]," + type; return pattern; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** EstimatorSingleCommand::EstimatorSingleCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); calledHelp = true; abort = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } bool hasSample = false; samplefile = validParameter.validFile(parameters, "sample"); if (samplefile == "not open") { samplefile = ""; abort = true; } else if (samplefile == "not found") { samplefile = ""; } else { hasSample = true; current->setSampleFile(samplefile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list, sabund, rabund or shared file before you can use the estimator.single command.\n"); abort = true; } } } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } //NOTE: if you add new calc options, don't forget to add them to the parameter initialize in setParameters or the gui won't be able to use them ValidCalculators validCalculator; calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "ig"; } samplingCalcs.insert("ig"); samplingCalcs.insert("ln"); samplingCalcs.insert("ls"); samplingCalcs.insert("si"); rarefactCalcs.push_back("igrarefact"); calcToSamplingCalc["igrarefact"] = "ig"; rarefactCalcs.push_back("lsrarefact"); calcToSamplingCalc["lsrarefact"] = "ls"; rarefactCalcs.push_back("lnrarefact"); calcToSamplingCalc["lnrarefact"] = "ln"; rarefactCalcs.push_back("sirarefact"); calcToSamplingCalc["sirarefact"] = "si"; abundCalcs.push_back("igabund"); calcToSamplingCalc["igabund"] = "ig"; abundCalcs.push_back("lnabund"); calcToSamplingCalc["lnabund"] = "ln"; abundCalcs.push_back("lsabund"); calcToSamplingCalc["lsabund"] = "ls"; abundCalcs.push_back("siabund"); calcToSamplingCalc["siabund"] = "si"; abundCalcs.push_back("sishift"); calcToSamplingCalc["sishift"] = "si"; abundCalcs.push_back("lnshift"); calcToSamplingCalc["lnshift"] = "ln"; abundCalcs.push_back("erarefact"); smallBurn.push_back("erarefact"); smallBurn.push_back("siabund"); smallBurn.push_back("sishift"); smallBurn.insert(smallBurn.end(), rarefactCalcs.begin(), rarefactCalcs.end()); //remove any typo calcs createSampling = false; if (validCalculator.isValidCalculator("estimator", calc) ) { bool ignore = false; if (!hasSample) { //if you didn't provide a mcmc sample file, but are trying to run a calc that needs it, then ignore if (samplingCalcs.count(calc) == 0) { ignore = true; } if (calc == "erarefact") { ignore = false; } } if (ignore) { m->mothurOut("\n[WARNING]: " + calc + " requires a mcmc sampling file and you have not provided one. You can produce a sampling file using the ig (metroig), ln (metroln), ls (metrols) or si (metrosichel) calculators. I will create the sampling file for you using the " + calcToSamplingCalc[calc] + " calculator.\n"); createSampling = true; } } if (calc == "") { abort = true; m->mothurOut("[ERROR]: no valid estimators, aborting.\n"); } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "sigmaa"); if (temp == "not found") { temp = "0.1"; } util.mothurConvert(temp, sigmaAlpha); temp = validParameter.valid(parameters, "sigmab"); if (temp == "not found") { temp = "0.1"; } util.mothurConvert(temp, sigmaBeta); temp = validParameter.valid(parameters, "sigman"); if (temp == "not found") { temp = "0.1"; } util.mothurConvert(temp, sigmaN); temp = validParameter.valid(parameters, "sigmas"); if (temp == "not found") { temp = "100.0"; } util.mothurConvert(temp, sigmaS); itersSet = true; temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "250000"; itersSet = false; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "fit"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, fitIters); temp = validParameter.valid(parameters, "burn"); if (temp == "not found") { if (util.inUsersGroups(calc, smallBurn)) { temp = "100000"; } else { temp = "2000000"; } } util.mothurConvert(temp, burn); temp = validParameter.valid(parameters, "burnsample"); if (temp == "not found") { if (util.inUsersGroups(calc, smallBurn)) { temp = "100"; } else { temp = "1000"; } } util.mothurConvert(temp, burnSample); if (burnSample <= 0) { m->mothurOut("[ERROR]: Burn sample must be greater than 0. Aborting.\n"); abort=true; } temp = validParameter.valid(parameters, "coverage"); if (temp == "not found") { temp = "0.8"; } util.mothurConvert(temp, coverage); if ((util.isEqual(coverage, -1)) && ((calc == "igrarefact") || (calc == "lnrarefact") || (calc == "lsrarefact") || (calc == "sirarefact"))) { m->mothurOut("[ERROR]: You must set the coverage parameter to run the igrarefact, lsrarefact, lnrarefact or sirarefact estimator. Aborting.\n"); abort=true; } #ifdef USE_GSL #else m->mothurOut("[ERROR]: You did not build mothur with the GNU Scientific Library which is required before you can use the estimator.single command. Aborting.\n"); abort = true; #endif } } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "EstimatorSingleCommand"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (createSampling) { string savedCalc = calc; int savedIters = iters; calc = calcToSamplingCalc[savedCalc]; if (!itersSet) { iters = 250000; } if (format != "sharedfile") { processSingleSample(); } //handles multiple label values else { processSharedFile(); } //handles multiple label values and multiple samples vector samplingFiles = outputTypes[calc]; if (samplingFiles.size() != 0) { samplefile = samplingFiles[0]; outputTypes["sample"].push_back(samplefile); calc = savedCalc; iters = savedIters; }else { return 0; } } if (format != "sharedfile") { processSingleSample(); } //handles multiple label values else { processSharedFile(); } //handles multiple label values and multiple samples if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set column file as new current columnfile itTypes = outputTypes.find("sample"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { string currentName = (itTypes->second)[0]; current->setSampleFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "execute"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::processSharedFile() { try { vector Groups; InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* shared = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = shared->getNamesGroups(); if (outputdir == "") { outputdir += util.hasPath(inputfile); } string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(inputfile)); map variables; variables["[filename]"] = fileNameRoot; variables["[distance]"] = shared->getLabel(); if (util.inUsersGroups(calc, samplingCalcs)) { variables["[distance]"] = shared->getLabel() + ".0"; } string outputFileName = getOutputFileName(calc, variables); vector out; outputNames.push_back(outputFileName); outputTypes[calc].push_back(outputFileName); if (util.inUsersGroups(calc, samplingCalcs)) { out.resize(3); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); variables["[distance]"] = shared->getLabel() + ".1"; string outputFileName1 = getOutputFileName(calc, variables); outputNames.push_back(outputFileName1); outputTypes[calc].push_back(outputFileName1); out[1] = new ofstream(); util.openOutputFile(outputFileName1, *out[1]); //format output out[1]->setf(ios::fixed, ios::floatfield); out[1]->setf(ios::showpoint); variables["[distance]"] = shared->getLabel() + ".2"; string outputFileName2 = getOutputFileName(calc, variables); outputNames.push_back(outputFileName2); outputTypes[calc].push_back(outputFileName2); out[2] = new ofstream(); util.openOutputFile(outputFileName2, *out[2]); //format output out[2]->setf(ios::fixed, ios::floatfield); out[2]->setf(ios::showpoint); *out[0] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; *out[1] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; *out[2] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; }else if (util.inUsersGroups(calc, rarefactCalcs)) { out.resize(1); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); *out[0] << "label\tgroup\t" << calc << "_Lower\t" << calc << "_Median\t" << calc << "_Upper\n"; }else if (util.inUsersGroups(calc, abundCalcs)) { out.resize(1); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); *out[0] << "label\tgroup\tnum\t" << calc << "\n"; } while (shared != nullptr) { if (m->getControl_pressed()) { delete shared; break; } processShared(shared, out, fileNameRoot); delete shared; shared = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out[0]->close(); delete out[0]; if (util.inUsersGroups(calc, samplingCalcs)) { out[1]->close(); out[2]->close(); delete out[1]; delete out[2]; } return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "processSharedFile"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::processSingleSample() { try { InputData input(inputfile, format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SAbundVector* sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); if (outputdir == "") { outputdir += util.hasPath(inputfile); } string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(inputfile)); map variables; variables["[filename]"] = fileNameRoot; variables["[distance]"] = sabund->getLabel(); if (util.inUsersGroups(calc, samplingCalcs)) { variables["[distance]"] = sabund->getLabel() + ".0"; } string outputFileName = getOutputFileName(calc, variables); vector out; outputNames.push_back(outputFileName); outputTypes[calc].push_back(outputFileName); if (util.inUsersGroups(calc, samplingCalcs)) { out.resize(3); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); variables["[distance]"] = sabund->getLabel() + ".1"; string outputFileName1 = getOutputFileName(calc, variables); outputNames.push_back(outputFileName1); outputTypes[calc].push_back(outputFileName1); out[1] = new ofstream(); util.openOutputFile(outputFileName1, *out[1]); //format output out[1]->setf(ios::fixed, ios::floatfield); out[1]->setf(ios::showpoint); variables["[distance]"] = sabund->getLabel() + ".2"; string outputFileName2 = getOutputFileName(calc, variables); outputNames.push_back(outputFileName2); outputTypes[calc].push_back(outputFileName2); out[2] = new ofstream(); util.openOutputFile(outputFileName2, *out[2]); //format output out[2]->setf(ios::fixed, ios::floatfield); out[2]->setf(ios::showpoint); *out[0] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; *out[1] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; *out[2] << "#Be sure to use the correct sampling estimator with your calculator. IG is used for igabund and igrarefact. LN is used for lnabund, lnshift and lnrarefact. LS is used for lsabund and lsrarefaction. SI is used for siabund, sirarefact and sishift.\n"; }else if (util.inUsersGroups(calc, rarefactCalcs)) { out.resize(1); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); *out[0] << "label\t" << calc << "_Lower\t" << calc << "_Median\t" << calc << "_Upper\n"; }else if (util.inUsersGroups(calc, abundCalcs)) { out.resize(1); out[0] = new ofstream(); util.openOutputFile(outputFileName, *out[0]); //format output out[0]->setf(ios::fixed, ios::floatfield); out[0]->setf(ios::showpoint); *out[0] << "label\tnum\t" << calc << "\n"; } while (sabund != nullptr) { if (m->getControl_pressed()) { delete sabund; break; } processSingle(sabund, sabund->getLabel(), out, fileNameRoot); delete sabund; sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); } out[0]->close(); delete out[0]; if (util.inUsersGroups(calc, samplingCalcs)) { out[1]->close(); out[2]->close(); delete out[1]; delete out[2]; } return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "processSingleSample"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::processShared(SharedRAbundVectors*& shared, vector& out, string fileRoot) { try { string outputFileName = ""; string label = shared->getLabel(); vector groupNames = shared->getNamesGroups(); vector< vector > abundResults; int maxSize = 0; for (int i = 0; i < groupNames.size(); i++) { m->mothurOut("\nProcessing group " + groupNames[i] + "\n\n"); string groupName = groupNames[i] + " " + label; SAbundVector* sabund = new SAbundVector(shared->getSAbundVector(groupNames[i])); if (m->getControl_pressed()) { delete sabund; break; } if (util.inUsersGroups(calc, samplingCalcs)) { *out[0] << "#" << groupName << endl; *out[1] << "#" << groupName << endl; *out[2] << "#" << groupName << endl; vector outputFileNames = runSamplingCalcs(sabund, fileRoot); util.appendFiles(outputFileNames[0], *out[0]); util.mothurRemove(outputFileNames[0]); util.appendFiles(outputFileNames[1], *out[1]); util.mothurRemove(outputFileNames[1]); util.appendFiles(outputFileNames[2], *out[2]); util.mothurRemove(outputFileNames[2]); }else if (util.inUsersGroups(calc, rarefactCalcs)) { *out[0] << label << '\t'; if (groupNames[i] != "") { *out[0] << groupNames[i] << '\t'; } runRarefactCalcs(sabund->getNumSeqs(), groupName, *out[0]); }else if (util.inUsersGroups(calc, abundCalcs)) { vector results = runAbundCalcs(sabund, groupName); abundResults.push_back(results); if (results.size() > maxSize) { maxSize = results.size(); } } delete sabund; } if (abundResults.size() != 0) {//ran an abund calc on several samples, combine results into one file //find smallest largest size for (int i = 0; i < abundResults.size(); i++) { if (abundResults[i].size() < maxSize) { abundResults[i].resize(maxSize, -1); //fill blanks with NA } } for (int i = 0; i < maxSize; i++) { for (int j = 0; j < groupNames.size(); j++) { *out[0] << label << '\t' << groupNames[j] << '\t' << (i+1); if (abundResults[j][i] == -1) { *out[0] << "\tNA" << endl; } else { *out[0] << '\t' << abundResults[j][i] << endl; } } } } return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "processShared"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::processSingle(SAbundVector*& sabund, string groupName, vector& out, string fileRoot) { try { string label = sabund->getLabel(); vector outputFileNames; if (util.inUsersGroups(calc, rarefactCalcs)) { *out[0] << label << '\t'; runRarefactCalcs(sabund->getNumSeqs(), groupName, *out[0]); }else if (util.inUsersGroups(calc, samplingCalcs)) { *out[0] << "#" << groupName << endl; *out[1] << "#" << groupName << endl; *out[2] << "#" << groupName << endl; vector outputFileNames = runSamplingCalcs(sabund, fileRoot); util.appendFiles(outputFileNames[0], *out[0]); util.mothurRemove(outputFileNames[0]); util.appendFiles(outputFileNames[1], *out[1]); util.mothurRemove(outputFileNames[1]); util.appendFiles(outputFileNames[2], *out[2]); util.mothurRemove(outputFileNames[2]); }else if (util.inUsersGroups(calc, abundCalcs)) { vector results = runAbundCalcs(sabund, groupName); for (int i = 0; i < results.size(); i++) { if (m->getControl_pressed()) { break; } *out[0] << label << '\t' << (i+1) << '\t' << results[i] << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "process"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::runRarefactCalcs(int numSeqs, string groupName, ofstream& out) { try { vector results; vector thisGroupSample; if ((calc == "igrarefact") || (calc == "lnrarefact")) { if (samplefile != "") { fillSampling(burn, burnSample); } } else if ((calc == "lsrarefact") || (calc == "sirarefact")) { if (samplefile != "") { fillSampling(burn, burnSample, true); } } it = sampling.find(groupName); if (it != sampling.end()) { thisGroupSample = it->second; } else { m->mothurOut("[ERROR]: Unable to find sampling info for group " + groupName + ", quitting. Do you need to adjust the iters, burn or burnsample parameters?\n"); m->setControl_pressed(true); return 0; } DiversityCalculator* diversityCalc; if (calc == "igrarefact") { diversityCalc = new IGRarefaction(coverage); } else if (calc == "lnrarefact") { diversityCalc = new LNRarefaction(coverage); } else if (calc == "lsrarefact") { diversityCalc = new LSRarefaction(coverage); } else if (calc == "sirarefact") { diversityCalc = new SIRarefaction(coverage); } results = diversityCalc->getValues(numSeqs, sampling[groupName]); delete diversityCalc; for (int i = 0; i < results.size(); i++) { out << results[i] << '\t'; } out << endl; return 0; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "runRarefactCalcs"); exit(1); } } //********************************************************************************************************************** vector EstimatorSingleCommand::runSamplingCalcs(SAbundVector*& sabund, string fileRoot) { try { vector resultFiles; map variables; variables["[filename]"] = fileRoot; variables["[distance]"] = sabund->getLabel(); variables["[tag]"] = calc; string outputFileStub = variables["[filename]"] + variables["[distance]"] + variables["[tag]"]; DiversityCalculator* diversityCalc; if (calc == "ig") { diversityCalc = new MetroIG(fitIters, sigmaAlpha, sigmaBeta, sigmaS, iters, outputFileStub); } else if (calc == "ln") { diversityCalc = new MetroLogNormal(fitIters, sigmaAlpha, sigmaBeta, sigmaS, iters, outputFileStub); } else if (calc == "ls") { diversityCalc = new MetroLogStudent(fitIters, sigmaAlpha, sigmaBeta, sigmaN, sigmaS, iters, outputFileStub); } else if (calc == "si") { diversityCalc = new MetroSichel(fitIters, sigmaAlpha, sigmaBeta, sigmaN, sigmaS, iters, outputFileStub); } resultFiles = diversityCalc->getValues(sabund); if (m->getControl_pressed()) { m->mothurOut("\nHow do you create the sampling files?\n\nRun a short trial MCMC run of 1000 iterations with guessed std. dev.s for the proposal distributions say about 10% of the parameter values. Adjust the std. dev.s untill the acceptance ratios are about 0.5. Then perform a longer run of say 250,000 iterations (mothur's default). Three data files with posterior samples for three different sets of parameter values will be generated.\n\n"); } delete diversityCalc; return resultFiles; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "runSamplingCalcs"); exit(1); } } //********************************************************************************************************************** vector EstimatorSingleCommand::runAbundCalcs(SAbundVector*& sabund, string groupName) { try { int maxRank = sabund->getMaxRank(); int numSeqs = sabund->getNumSeqs(); vector results; vector thisGroupSample; if ((calc == "igabund") || (calc == "lnshift") || (calc == "lnabund")) { if (samplefile != "") { fillSampling(burn, burnSample); } } else if ((calc == "siabund") || (calc == "sishift") || (calc == "lsabund")) { if (samplefile != "") { fillSampling(burn, burnSample, true); } } if (calc != "erarefact") { it = sampling.find(groupName); if (it != sampling.end()) { thisGroupSample = it->second; } else { m->mothurOut("[ERROR]: Unable to find sampling info for group " + groupName + ", quitting. Do you need to adjust the iters, burn or burnsample parameters?\n"); m->setControl_pressed(true); return results; } } //convert freq percentage to number int increment = 1; if (freq < 1.0) { increment = numSeqs * freq; } else { increment = freq; } DiversityCalculator* diversityCalc; if (calc == "igabund") { diversityCalc = new IGAbundance(); } else if (calc == "lnshift") { diversityCalc = new LNShift(); } else if (calc == "lnabund") { diversityCalc = new LNAbundance(); } else if (calc == "siabund") { diversityCalc = new SIAbundance(); } else if (calc == "sishift") { diversityCalc = new SIShift(); } else if (calc == "lsabund") { diversityCalc = new LSAbundance(); } else if (calc == "erarefact"){ diversityCalc = new ERarefaction(increment); } if (calc == "erarefact") { diversityCalc->getValues(sabund, results); } else if ((calc == "igabund") || (calc == "siabund") || (calc == "lnabund") || (calc == "lsabund")) { results = diversityCalc->getValues(maxRank, sampling[groupName]); } else { //sishift, lnshift results = diversityCalc->getValues(numSeqs, sampling[groupName]); } delete diversityCalc; return results; } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "runAbundCalcs"); exit(1); } } //********************************************************************************************************************** int EstimatorSingleCommand::fillSampling(int burnValue, int burnSampleValue, bool filldNu) { try { sampling.clear(); int numPiecesExpected = 5; if (filldNu) { numPiecesExpected = 6; } ifstream in; util.openInputFile(samplefile, in); util.getline(in); gobble(in); //grab header string groupName = ""; while (!in.eof()) { if (m->getControl_pressed()) { break; } string line = util.getline(in); gobble(in); if (line != "") { if (line[0] == '#') { groupName = line.substr(1); //looks like #groupName label ie. #F000D000 0.03 }else { vector pieces; util.splitAtComma(line, pieces); if (pieces.size() == numPiecesExpected) { int sampleSize, ns; util.mothurConvert(pieces[0], sampleSize); if ((sampleSize > burnValue) && (sampleSize % burnSampleValue == 0)) { double alpha = 0, beta = 0, dNu = 0; if (!filldNu) { util.mothurConvert(pieces[3], ns); } else { util.mothurConvert(pieces[3], dNu); util.mothurConvert(pieces[4], ns); } util.mothurConvert(pieces[1], alpha); util.mothurConvert(pieces[2], beta); mcmcSample entry(alpha, beta, dNu, ns); sampling[groupName].push_back(entry); } }else { m->mothurOut("\n[WARNING]: Unexpected format in sampling file, ignoring. Expected " + toString(numPiecesExpected) + " values separated by commas, found " + toString(pieces.size()) + ". Expecting something like: '0,7.419861e-01,4.695223e+00,5773,337.552846' or 0,-1.787922,6.348652,4784302.925302,8806,331.214377 for each line.\n\n"); sampling.clear(); break; } } } } in.close(); return ((int)sampling.size()); } catch(exception& e) { m->errorOut(e, "EstimatorSingleCommand", "fillSampling"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/diversityestimatorcommand.hpp000066400000000000000000000045421424121717000242510ustar00rootroot00000000000000// // diversityestimatorcommand.hpp // Mothur // // Created by Sarah Westcott on 4/4/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef diversityestimatorcommand_hpp #define diversityestimatorcommand_hpp #include "command.hpp" #include "inputdata.h" #include "validcalculator.h" //****************************************************** class EstimatorSingleCommand : public Command { public: EstimatorSingleCommand(string); ~EstimatorSingleCommand(){} vector setParameters(); string getCommandName() { return "estimator.single"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Estimator.single"; } string getDescription() { return "This command implements the diversity estimators from https://github.com/chrisquince/DiversityEstimates. https://www.ncbi.nlm.nih.gov/pubmed/18650928"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, burnSampleSet, burnSet, createSampling, itersSet; string label, calc, sharedfile, listfile, rabundfile, sabundfile, format, inputfile, samplefile; double freq, sigmaAlpha, sigmaBeta, sigmaS, sigmaN, coverage; int iters, burn, burnSample, fitIters; vector outputNames; set labels; //holds labels to be used vector groups, rarefactCalcs, abundCalcs, smallBurn; map > sampling; map > ::iterator it; set samplingCalcs; map calcToSamplingCalc; map ::iterator itCalcSample; int fillSampling(int, int, bool filldNu=false); int processSingleSample(); int processSharedFile(); int processShared(SharedRAbundVectors*& shared, vector& out, string fileRoot); int processSingle(SAbundVector*&, string, vector&, string); int runRarefactCalcs(int numSeqs, string groupName, ofstream& out); vector runSamplingCalcs(SAbundVector*&, string); vector runAbundCalcs(SAbundVector*&, string groupName); }; //******************************************************* #endif /* diversityestimatorcommand_hpp */ mothur-1.48.0/source/commands/fastaqinfocommand.cpp000077500000000000000000001605351424121717000224350ustar00rootroot00000000000000/* * parsefastaqcommand.cpp * Mothur * * Created by westcott on 9/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "fastaqinfocommand.h" #include "sequence.hpp" #include "counttable.h" //********************************************************************************************************************** vector ParseFastaQCommand::setParameters(){ try { CommandParameter pfile("file", "InputTypes", "", "", "fastqFile", "fastqFile", "none","",false,false,true); parameters.push_back(pfile); CommandParameter pfastq("fastq", "InputTypes", "", "", "fastqFile", "fastqFile", "none","",false,false,true); parameters.push_back(pfastq); CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta); CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual); CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; fileOption = 0; createFileGroup = false; hasIndex = false; split = 1; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ParseFastaQCommand::getHelpString(){ try { string helpString = ""; helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file or can be used to parse fastq files by sample.\n"; helpString += "The fastq.info command parameters are file, fastq, fasta, qfile, oligos, group and format; file or fastq is required.\n"; helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n"; helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n"; helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n"; helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n"; helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n"; helpString += "Example fastq.info(fastaq=test.fastaq).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ParseFastaQCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],fasta-[filename],[sample],[tag],fasta-[filename],[sample],fasta"; } else if (type == "count") { pattern = "[filename],count_table"; } else if (type == "qfile") { pattern = "[filename],qual-[filename],[sample],[tag],qual-[filename],[sample],qual"; } else if (type == "fastq") { pattern = "[filename],[sample],fastq-[filename],[sample],[tag],fastq"; } //make.sra assumes the [filename],[sample],[tag],fastq format for the 4 column file option. If this changes, may have to modify fixMap function. else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ParseFastaQCommand::ParseFastaQCommand(string option) : Command(){ try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastaQFile = validParameter.validFile(parameters, "fastq"); if (fastaQFile == "not found") { fastaQFile= ""; } else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; } else { inputfile = fastaQFile; } file = validParameter.validFile(parameters, "file"); if (file == "not found") { file = ""; } else if (file == "not open") { file = ""; abort = true; } else { inputfile = file; fileOption = true; } if ((file == "") && (fastaQFile == "")) { m->mothurOut("You must provide a file or fastq option.\n"); abort = true; } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not found") { oligosfile = ""; } else if (oligosfile == "not open") { oligosfile = ""; abort = true; } else { current->setOligosFile(oligosfile); split = 2; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not found") { groupfile = ""; } else if (groupfile == "not open") { groupfile = ""; abort = true; } else { current->setGroupFile(groupfile); split = 2; } if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } string temp; temp = validParameter.valid(parameters, "fasta"); if(temp == "not found"){ temp = "T"; } fasta = util.isTrue(temp); temp = validParameter.valid(parameters, "qfile"); if(temp == "not found"){ temp = "T"; } qual = util.isTrue(temp); temp = validParameter.valid(parameters, "pacbio"); if(temp == "not found"){ temp = "F"; } pacbio = util.isTrue(temp); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine(); abort=true; } if ((!fasta) && (!qual) && (file == "") && (fastaQFile == "") && (oligosfile == "")) { m->mothurOut("[ERROR]: no outputs selected. Aborting.\n"); abort=true; } temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "F"; } reorient = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand"); exit(1); } } //********************************************************************************************************************** int ParseFastaQCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } vector< vector > files; if (file != "") { files = readFile(); } if (m->getControl_pressed()) { return 0; } TrimOligos* trimOligos = nullptr; TrimOligos* rtrimOligos = nullptr; pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0; if (oligosfile != "") { readOligos(oligosfile); //find group read belongs to if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes(), hasIndex); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); } else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size(); } if (reorient) { rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes(), hasIndex); numBarcodes = oligos.getReorientedPairedBarcodes().size(); } }else if (groupfile != "") { readGroup(groupfile); } string inputFile = ""; if (file != "") { inputFile = file; if (m->getControl_pressed()) { if (groupfile != "") { delete groupMap; } if (oligosfile != "") { delete trimOligos; if (reorient) { delete rtrimOligos; } } return 0; } //groupfile name for pacbio with option 2 map variables; variables["[filename]"] = util.getRootName(file); string pacbioFastaFileName = getOutputFileName("fasta", variables); string pacbioQualFileName = getOutputFileName("qfile", variables); if ((fileOption == 2) && pacbio) { seqGroups.clear(); if (fasta) { ofstream temppbf; util.openOutputFile(pacbioFastaFileName, temppbf); temppbf.close(); outputNames.push_back(pacbioFastaFileName); outputTypes["fasta"].push_back(pacbioFastaFileName); } if (qual) { ofstream temppbq; util.openOutputFile(pacbioQualFileName, temppbq); temppbq.close(); outputNames.push_back(pacbioQualFileName); outputTypes["qfile"].push_back(pacbioQualFileName); } } //clear old file for append for (int i = 0; i < files.size(); i++) { //process each pair if (m->getControl_pressed()) { break; } if (((fileOption == 2) || (fileOption == 4)) && !pacbio) { //2 column and 4 column format file file processFile(files[i], trimOligos, rtrimOligos); }else if ((fileOption == 2) && pacbio) { //pacbio with group filename option split = 1; if (current->getMothurCalling()) { //add group names to fastq files and make copies - for sra command parse ofstream temp; map variables; variables["[filename]"] = util.getRootName(files[i][0]); variables["[sample]"] = file2Group[i]; variables["[tag]"] = ""; string newfqFile = getOutputFileName("fastq", variables); util.openOutputFile(newfqFile, temp); temp.close(); util.appendFiles(files[i][0], newfqFile); outputNames.push_back(newfqFile); outputTypes["fastq"].push_back(newfqFile); } inputFile = files[i][0]; //process each file to create fasta and qual files set seqNames; if (fasta || qual) { seqNames = processFile(inputFile, trimOligos, rtrimOligos); } //split = 1, so no parsing by group will be done. if (seqNames.size() != 0) { string pacbioGroup = file2Group[i]; for (set::iterator it = seqNames.begin(); it != seqNames.end(); it++) { seqGroups[*it] = pacbioGroup; } groupCounts[pacbioGroup] = seqNames.size(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFile)); string fastaFile = getOutputFileName("fasta",variables); string qualFile = getOutputFileName("qfile",variables); if (fasta) { util.appendFiles(fastaFile, pacbioFastaFileName); } if (qual) { util.appendFiles(qualFile, pacbioQualFileName); } } }else if (fileOption == 3) { //3 column file option with sample names if (current->getMothurCalling()) { //add group names to fastq files and make copies - for sra command parse ofstream temp, temp2; map variables; variables["[filename]"] = util.getRootName(files[i][0]); variables["[sample]"] = file2Group[i]; variables["[tag]"] = "forward"; string newffqFile = getOutputFileName("fastq", variables); util.openOutputFile(newffqFile, temp); temp.close(); util.appendFiles(files[i][0], newffqFile); outputNames.push_back(newffqFile); outputTypes["fastq"].push_back(newffqFile); variables["[filename]"] = util.getRootName(files[i][1]); variables["[sample]"] = file2Group[i]; variables["[tag]"] = "reverse"; string newfrqFile = getOutputFileName("fastq", variables); util.openOutputFile(newfrqFile, temp2); temp2.close(); util.appendFiles(files[i][1], newfrqFile); outputNames.push_back(newfrqFile); outputTypes["fastq"].push_back(newfrqFile); } //if requested, make fasta and qual if (fasta || qual) { processFile(files[i], trimOligos, rtrimOligos); } //split = 1, so no parsing by group will be done. } } }else { inputFile = fastaQFile; processFile(fastaQFile, trimOligos, rtrimOligos); vector filesFakeOut; filesFakeOut.push_back(fastaQFile); files.push_back(filesFakeOut); } if ((fileOption == 2) && pacbio) { map variables; variables["[filename]"] = util.getRootName(file); string pacbioGroupFileName = getOutputFileName("count", variables); outputNames.push_back(pacbioGroupFileName); outputTypes["count"].push_back(pacbioGroupFileName); CountTable ct; ct.createTable(seqGroups); ct.printCompressedTable(pacbioGroupFileName); } if (split > 1) { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(inputFile); } map vars; vars["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFile)); string outputGroupFileName = getOutputFileName("count",vars); if (seqGroups.size() != 0) { //Create count file outputNames.push_back(outputGroupFileName); outputTypes["count"].push_back(outputGroupFileName); CountTable ct; ct.createTable(seqGroups); ct.printCompressedTable(outputGroupFileName); } for (int i = 0; i < files.size(); i++) { //process each pair for (int j = 0; j < files[i].size(); j++) { if (files[i][j] != "") { map > filenames = splitFastqFile(outputGroupFileName, files[i][j]); map >::iterator it = filenames.find("fastq"); if (it != filenames.end()) { //fastq files were produced for (int k = 0; k < it->second.size(); k++) { outputNames.push_back(it->second[k]); outputTypes["fastq"].push_back(it->second[k]); } } } } } //ffqnoMatchFile, rfqnoMatchFile, ffnoMatchFile, rfnoMatchFile, fqnoMatchFile, rqnoMatchFile if(util.isBlank(ffqnoMatchFile)){ util.mothurRemove(ffqnoMatchFile); } else { outputNames.push_back(ffqnoMatchFile); outputTypes["fastq"].push_back(ffqnoMatchFile); } if(fasta){ if(util.isBlank(ffnoMatchFile)){ util.mothurRemove(ffnoMatchFile); } else { outputNames.push_back(ffnoMatchFile); outputTypes["fasta"].push_back(ffnoMatchFile); } } if(qual){ if(util.isBlank(fqnoMatchFile)){ util.mothurRemove(fqnoMatchFile); } else { outputNames.push_back(fqnoMatchFile); outputTypes["qfile"].push_back(fqnoMatchFile); } } if (pairedOligos) { if (fileOption > 0) { if(util.isBlank(rfqnoMatchFile)){ util.mothurRemove(rfqnoMatchFile); } else { outputNames.push_back(rfqnoMatchFile); outputTypes["fastq"].push_back(rfqnoMatchFile); } if(fasta){ if(util.isBlank(rfnoMatchFile)){ util.mothurRemove(rfnoMatchFile); } else { outputNames.push_back(rfnoMatchFile); outputTypes["fasta"].push_back(rfnoMatchFile); } } if(qual){ if(util.isBlank(rqnoMatchFile)){ util.mothurRemove(rqnoMatchFile); } else { outputNames.push_back(rqnoMatchFile); outputTypes["qfile"].push_back(rqnoMatchFile); } } } } } //output group counts int total = 0; if (groupCounts.size() != 0) { m->mothurOut("\nGroup count: \n"); } for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second) + "\n"); } if (total != 0) { m->mothurOut("\nTotal of all groups is " + toString(total) + "\n"); } if (groupfile != "") { delete groupMap; } if (oligosfile != "") { delete trimOligos; if (reorient) { delete rtrimOligos; } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); outputNames.clear(); return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "execute"); exit(1); } } /**************************************************************************************************/ map > ParseFastaQCommand::splitFastqFile(string outputGroupFile, string resultFastqfile) { try { //run split.groups command //use unique.seqs to create new name and fastafile string inputString = "fastq=" + resultFastqfile + ", count=" + outputGroupFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Generating parsed files... Running command: split.groups(" + inputString + ")\n"); current->setMothurCalling(true); Command* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); map > filenames = splitCommand->getOutputFiles(); delete splitCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (fasta || qual) { //do we need to create a fasta and qual file for these split fastq files string fastaBool = "false"; string qualBool = "false"; if (fasta) { fastaBool = "true"; } if (qual) { qualBool = "true"; } map >::iterator it = filenames.find("fastq"); if (it != filenames.end()) { //fastq files were produced for (int k = 0; k < it->second.size(); k++) { string inputString = "fastq=" + it->second[k] + ", fasta=" + fastaBool + ", qfile=" + qualBool; m->mothurOut("/******************************************/\n"); m->mothurOut("Generating parsed fasta and qual files... Running command: fastq.info(" + inputString + ")\n"); current->setMothurCalling(true); Command* fastqCommand = new ParseFastaQCommand(inputString); fastqCommand->execute(); map > fnames = fastqCommand->getOutputFiles(); delete fastqCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); if (fasta) { map >::iterator itFastaName = fnames.find("fasta"); if (itFastaName != fnames.end()) { string fName = itFastaName->second[0]; outputNames.push_back(fName); outputTypes["fasta"].push_back(fName); } } if (qual) { map >::iterator itQualName = fnames.find("qfile"); if (itQualName != fnames.end()) { string qName = itQualName->second[0]; outputNames.push_back(qName); outputTypes["qfile"].push_back(qName); } } } } } return filenames; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "splitFastqFile"); exit(1); } } //********************************************************************************************************************** //assumes file option was used. //Adds reads to seqGroup and groupCounts for use with split.groups command later. //Outputs fasta and qual files for file pair if desired //Appends scrap files if needed int ParseFastaQCommand::processFile(vector files, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){ try { string inputfile = files[0]; string inputReverse = files[1]; //open Output Files map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); string ffastaFile = getOutputFileName("fasta",variables); string fqualFile = getOutputFileName("qfile",variables); variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputReverse)); string rfastaFile = getOutputFileName("fasta",variables); string rqualFile = getOutputFileName("qfile",variables); ofstream outfFasta, outfQual, outrFasta, outrQual; if (fasta) { util.openOutputFile(ffastaFile, outfFasta); outputNames.push_back(ffastaFile); outputTypes["fasta"].push_back(ffastaFile); util.openOutputFile(rfastaFile, outrFasta); outputNames.push_back(rfastaFile); outputTypes["fasta"].push_back(rfastaFile);} if (qual) { util.openOutputFile(fqualFile, outfQual); outputNames.push_back(fqualFile); outputTypes["qfile"].push_back(fqualFile); util.openOutputFile(rqualFile, outrQual); outputNames.push_back(rqualFile); outputTypes["qfile"].push_back(rqualFile); } ifstream inf; util.openInputFile(inputfile, inf); ifstream inr; util.openInputFile(inputReverse, inr); ifstream inFIndex, inRIndex; if (files[2] != "") { util.openInputFile(files[2], inFIndex); } if (files[3] != "") { util.openInputFile(files[3], inRIndex); } int count = 0; while (!inf.eof() && !inr.eof()) { if (m->getControl_pressed()) { break; } bool ignoref, ignorer; FastqRead thisfRead(inf, ignoref, format); FastqRead thisrRead(inr, ignorer, format); if (!ignoref && ! ignorer) { if (qual) { thisfRead.getQuality().printQScores(outfQual); thisrRead.getQuality().printQScores(outrQual); } if (pacbio) { //change sequence bases with 0 quality scores to N vector fqual = thisfRead.getScores(); vector rqual = thisrRead.getScores(); string fseq = thisfRead.getSeq(); string rseq = thisrRead.getSeq(); for (int i = 0; i < fqual.size(); i++) { if (fqual[i] == 0){ fseq[i] = 'N'; } } thisfRead.setSeq(fseq); for (int i = 0; i < rqual.size(); i++) { if (rqual[i] == 0){ rseq[i] = 'N'; } } thisrRead.setSeq(rseq); } FastqRead copyForward = thisfRead; FastqRead copyReverse = thisrRead; //print sequence info to files if (fasta) { thisfRead.getSequence().printSequence(outfFasta); thisrRead.getSequence().printSequence(outrFasta); } if (m->getControl_pressed()) { break; } if (split > 1) { Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE"); if (fileOption == 4) { bool ignorefi, ignoreri; if (files[2] != "") { FastqRead thisfiRead(inFIndex, ignorefi, format); if (!ignorefi) { findexBarcode.setAligned(thisfiRead.getSequence().getAligned()); } } if (files[3] != "") { FastqRead thisriRead(inRIndex, ignoreri, format); if (!ignoreri) { rindexBarcode.setAligned(thisriRead.getSequence().getAligned()); } } } int trashCodeLength; string thisGroup = "ignore"; if (oligosfile != "") { QualityScores tempF = thisfRead.getQuality(); QualityScores tempR = thisrRead.getQuality(); if ((files[2] != "") || (files[3] != "")) { //has index files //barcode already removed so no need to reset sequence to trimmed version trashCodeLength = findGroup(findexBarcode, tempF, rindexBarcode, tempR, thisGroup, trimOligos, rtrimOligos, numBarcodes, numPrimers); }else { Sequence tempSeqF = thisfRead.getSequence(); Sequence tempSeqR = thisrRead.getSequence(); trashCodeLength = findGroup(tempSeqF, tempF, tempSeqR, tempR, thisGroup, trimOligos, rtrimOligos, numBarcodes, numPrimers); thisfRead.setSeq(tempSeqF.getUnaligned()); thisrRead.setSeq(tempSeqR.getUnaligned()); } thisfRead.setScores(tempF.getScores()); //set to trimmed scores thisrRead.setScores(tempR.getScores()); }else if (groupfile != "") { trashCodeLength = findGroup(thisfRead.getSequence(), thisGroup, "groupMode"); } else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } bool addToScrap = false; if(trashCodeLength == 0){ int pos = thisGroup.find("ignore"); if (pos == string::npos) { if (thisGroup != "") { seqGroups[copyForward.getName()] = thisGroup; map::iterator it = groupCounts.find(thisGroup); if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } else { groupCounts[it->first]++; } } }else { addToScrap = true; } }else{ addToScrap = true; } if (addToScrap) { //print no match fastq ofstream out, out2; util.openOutputFileAppend(ffqnoMatchFile, out); copyForward.printFastq(out); out.close(); util.openOutputFileAppend(rfqnoMatchFile, out2); copyReverse.printFastq(out2); out2.close(); //print no match fasta, if wanted if (fasta) { ofstream outf, outr; util.openOutputFileAppend(ffnoMatchFile, outf); thisfRead.getSequence().printSequence(outf); outf.close(); util.openOutputFileAppend(rfnoMatchFile, outr); thisrRead.getSequence().printSequence(outr); outr.close(); } //print no match quality parse, if wanted if (qual) { ofstream outq, outq2; util.openOutputFileAppend(fqnoMatchFile, outq); thisfRead.getQuality().printQScores(outq); outq.close(); util.openOutputFileAppend(rqnoMatchFile, outq2); thisrRead.getQuality().printQScores(outq2); outq2.close(); } } } //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } count++; } } inf.close(); inr.close(); if (files[2] != "") { inFIndex.close(); } if (files[3] != "") { inRIndex.close(); } if (fasta) { outfFasta.close(); outrFasta.close(); } if (qual) { outfQual.close(); outrQual.close(); } //report progress if (!m->getControl_pressed()) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } return 0; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "processFile"); exit(1); } } //********************************************************************************************************************** set ParseFastaQCommand::processFile(string inputfile, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){ try { //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); } //open Output Files map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); string fastaFile = getOutputFileName("fasta",variables); string qualFile = getOutputFileName("qfile",variables); ofstream outFasta, outQual; //fasta and quality files for whole input file if (fasta) { util.openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); } if (qual) { util.openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); } ifstream in; util.openInputFile(inputfile, in); int count = 0; set names; while (!in.eof()) { if (m->getControl_pressed()) { break; } bool ignore; FastqRead thisRead(in, ignore, format); if (!ignore) { if (qual) { thisRead.getQuality().printQScores(outQual); } if (pacbio) { vector qual = thisRead.getScores(); string seq = thisRead.getSeq(); for (int i = 0; i < qual.size(); i++) { if (qual[i] == 0){ seq[i] = 'N'; } } thisRead.setSeq(seq); names.insert(thisRead.getName()); } FastqRead copy = thisRead; //print sequence info to files if (fasta) { thisRead.getSequence().printSequence(outFasta); } if (m->getControl_pressed()) { break; } if (split > 1) { int trashCodeLength = 0; string thisGroup = "ignore"; if (oligosfile != "") { Sequence tempSeq = thisRead.getSequence(); QualityScores tempQual = thisRead.getQuality(); trashCodeLength = findGroup(tempSeq, tempQual, thisGroup, trimOligos, rtrimOligos, numBarcodes, numPrimers); thisRead.setSeq(tempSeq.getUnaligned()); thisRead.setScores(tempQual.getScores()); } else if (groupfile != "") { trashCodeLength = findGroup(thisRead.getSequence(), thisGroup, "groupMode"); } else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } bool addToScrap = false; if(trashCodeLength == 0){ int pos = thisGroup.find("ignore"); if (pos == string::npos) { if (thisGroup != "") { seqGroups[copy.getName()] = thisGroup; map::iterator it = groupCounts.find(thisGroup); if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } else { groupCounts[it->first]++; } } }else { addToScrap = true; } }else{ addToScrap = true; } if (addToScrap) { //print no match fastq ofstream out; util.openOutputFileAppend(ffqnoMatchFile, out); copy.printFastq(out); out.close(); //print no match fasta, if wanted if (fasta) { ofstream outf; util.openOutputFileAppend(ffnoMatchFile, outf); thisRead.getSequence().printSequence(outf); outf.close(); } //print no match quality parse, if wanted if (qual) { ofstream outq; util.openOutputFileAppend(fqnoMatchFile, outq); thisRead.getQuality().printQScores(outq); outq.close(); } } } //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } count++; } } in.close(); if (fasta) { outFasta.close(); } if (qual) { outQual.close(); } //report progress if (!m->getControl_pressed()){ if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } return names; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "processFile"); exit(1); } } //********************************************************************************************************************** int ParseFastaQCommand::findGroup(Sequence& currSeq, QualityScores& currQual, string& thisGroup, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) { try { int success = 1; int barcode, primer; string trashCode = ""; int currentSeqsDiffs = 0; //for reorient Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); QualityScores savedQual(currQual.getName(), currQual.getScores()); if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq, currQual); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } if(numBarcodes != 0){ vector results = trimOligos->stripBarcode(currSeq, currQual, barcode); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numPrimers != 0){ vector results = trimOligos->stripForward(currSeq, currQual, primer, true); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if(numRPrimers != 0){ vector results = trimOligos->stripReverse(currSeq, currQual); success = results[0]; if(success > pdiffs) { trashCode += 'r'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if (reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; int thisCurrentSeqsDiffs = 0; int thisBarcodeIndex = 0; int thisPrimerIndex = 0; if(numBarcodes != 0){ vector results = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > bdiffs) { thisTrashCode += "b"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(numPrimers != 0){ vector results = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > pdiffs) { thisTrashCode += "f"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { trashCode = thisTrashCode; success = thisSuccess; currentSeqsDiffs = thisCurrentSeqsDiffs; barcode = thisBarcodeIndex; primer = thisPrimerIndex; }else { trashCode += "(" + thisTrashCode + ")"; } } if (trashCode.length() == 0) { //is this sequence in the ignore group thisGroup = oligos.getGroupName(barcode, primer); int pos = thisGroup.find("ignore"); if (pos != string::npos) { trashCode += "i"; } } return trashCode.length(); } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "findGroup"); exit(1); } } //********************************************************************************************************************** int ParseFastaQCommand::findGroup(Sequence seq, string& group, string groupMode) { try { string trashCode = ""; group = groupMap->getGroup(seq.getName()); if (group == "not found") { trashCode += "g"; } //scrap for group return trashCode.length(); } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "findGroup"); exit(1); } } //********************************************************************************************************************** int ParseFastaQCommand::findGroup(Sequence& fcurrSeq, QualityScores& fcurrQual, Sequence& rcurrSeq, QualityScores& rcurrQual, string& thisGroup, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) { try { int success = 1; int barcode, primer; string trashCode = ""; int currentSeqsDiffs = 0; //for reorient Sequence fsavedSeq(fcurrSeq.getName(), fcurrSeq.getAligned()); QualityScores fsavedQual(fcurrQual.getName(), fcurrQual.getScores()); Sequence rsavedSeq(rcurrSeq.getName(), rcurrSeq.getAligned()); QualityScores rsavedQual(rcurrQual.getName(), rcurrQual.getScores()); if(numBarcodes != 0){ vector results = trimOligos->stripBarcode(fcurrSeq, rcurrSeq, fcurrQual, rcurrQual, barcode); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numPrimers != 0){ vector results = trimOligos->stripForward(fcurrSeq, rcurrSeq, fcurrQual, rcurrQual, primer); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if (reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; int thisCurrentSeqsDiffs = 0; int thisBarcodeIndex = 0; int thisPrimerIndex = 0; if(numBarcodes != 0){ vector results = rtrimOligos->stripBarcode(fsavedSeq, rsavedSeq, fsavedQual, rsavedQual, thisBarcodeIndex); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > bdiffs) { thisTrashCode += 'b'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(numPrimers != 0){ vector results = rtrimOligos->stripForward(fsavedSeq, rsavedSeq, fsavedQual, rsavedQual, thisPrimerIndex); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > pdiffs) { thisTrashCode += 'f'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { trashCode = thisTrashCode; success = thisSuccess; currentSeqsDiffs = thisCurrentSeqsDiffs; barcode = thisBarcodeIndex; primer = thisPrimerIndex; }else { trashCode += "(" + thisTrashCode + ")"; } } if (trashCode.length() == 0) { //is this sequence in the ignore group thisGroup = oligos.getGroupName(barcode, primer); int pos = thisGroup.find("ignore"); if (pos != string::npos) { trashCode += "i"; } } return trashCode.length(); } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "findGroup"); exit(1); } } //********************************************************************************************************************** /* file option 1 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 2 group ffastqfile rfastqfile group ffastqfile rfastqfile group ffastqfile rfastqfile ... file option 3 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file */ //lines can be 2, 3, or 4 columns // forward.fastq reverse.fastq -> 2 column // groupName forward.fastq reverse.fastq -> 3 column // forward.fastq reverse.fastq forward.index.fastq reverse.index.fastq -> 4 column // forward.fastq reverse.fastq none reverse.index.fastq -> 4 column // forward.fastq reverse.fastq forward.index.fastq none -> 4 column vector< vector > ParseFastaQCommand::readFile(){ try { string mode = "parseFastq"; if (pacbio) { mode = "parsefastqpacbio"; } //reads 2 column option as group filename FileFile dataFile(inputfile, mode); vector< vector > files = dataFile.getFiles(); //if pacbio 2 columns, files[x][0] = filename, files[x][1] = "", files[x][2] = "", files[x][3] = "", file2Group = dataFile.getFile2Group(); createFileGroup = dataFile.isColumnWithGroupNames(); hasIndex = dataFile.containsIndexFiles(); int dataFileFormat = dataFile.getFileFormat(); if (hasIndex && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->setControl_pressed(true); } if ((oligosfile != "") && (dataFileFormat == 2)) { m->mothurOut("[ERROR]: You cannot have an oligosfile and 3 column file option at the same time. Aborting. \n"); m->setControl_pressed(true); } if ((oligosfile != "") && (dataFileFormat == 1) && pacbio) { m->mothurOut("[ERROR]: You cannot have an oligosfile and 2 column pacbio file option at the same time. Aborting. \n"); m->setControl_pressed(true); } if ((groupfile != "") && (dataFileFormat == 2)){ m->mothurOut("[ERROR]: You cannot have an groupfile and 3 column file option at the same time. Aborting. \n"); m->setControl_pressed(true); } for (int i = 0; i < files.size(); i++) { string group = ""; string forward, reverse, findex, rindex; forward = files[i][0]; reverse = files[i][1]; findex = files[i][2]; rindex = files[i][3]; if (dataFileFormat == 1) { //2 column fileOption = 2; }else if (dataFileFormat == 2) { //3 column fileOption = 3; }else if (dataFileFormat == 3) { //4 column fileOption = 4; if ((findex == "none") || (findex == "NONE")){ files[i][2] = ""; } if ((rindex == "none") || (rindex == "NONE")){ files[i][3] = ""; } } } if (files.size() == 0) { m->setControl_pressed(true); } return files; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "readFileNames"); exit(1); } } //*************************************************************************************************************** bool ParseFastaQCommand::readOligos(string oligoFile){ try { bool allBlank = false; if (fileOption > 0) { oligos.read(oligosfile, false); } // like make.contigs else { oligos.read(oligosfile); } if (m->getControl_pressed()) { return false; } //error in reading oligos if (oligos.hasPairedPrimers() || oligos.hasPairedBarcodes()) { pairedOligos = true; numPrimers = oligos.getPairedPrimers().size(); numBarcodes = oligos.getPairedBarcodes().size(); }else { pairedOligos = false; numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size(); } numLinkers = oligos.getLinkers().size(); numSpacers = oligos.getSpacers().size(); numRPrimers = oligos.getReversePrimers().size(); vector groupNames = oligos.getSRAGroupNames(); if (groupNames.size() == 0) { allBlank = true; } if (allBlank) { m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile.\n"); return false; } //make blank files for scrap matches ofstream temp, tempff, tempfq, rtemp, temprf, temprq; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[sample]"] = "scrap"; if (fileOption > 0) { variables["[tag]"] = "forward"; } ffqnoMatchFile = getOutputFileName("fastq", variables); util.openOutputFile(ffqnoMatchFile, temp); temp.close(); if (fileOption > 0) { variables["[tag]"] = "reverse"; rfqnoMatchFile = getOutputFileName("fastq", variables); util.openOutputFile(rfqnoMatchFile, rtemp); rtemp.close(); } if (fasta) { if (fileOption > 0) { variables["[tag]"] = "forward"; } ffnoMatchFile = getOutputFileName("fasta", variables); util.openOutputFile(ffnoMatchFile, tempff); tempff.close(); if (fileOption > 0) { variables["[tag]"] = "reverse"; rfnoMatchFile = getOutputFileName("fasta", variables); util.openOutputFile(rfnoMatchFile, temprf); temprf.close(); } } if (qual) { if (fileOption > 0) { variables["[tag]"] = "forward"; } fqnoMatchFile = getOutputFileName("qfile", variables); util.openOutputFile(fqnoMatchFile, tempfq); tempfq.close(); if (fileOption > 0) { variables["[tag]"] = "reverse"; rqnoMatchFile = getOutputFileName("qfile", variables); util.openOutputFile(rqnoMatchFile, temprq); temprq.close(); } } return true; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "getOligos"); exit(1); } } //*************************************************************************************************************** bool ParseFastaQCommand::readGroup(string groupfile){ try { groupMap = new GroupMap(); groupMap->readMap(groupfile); vector groups = groupMap->getNamesOfGroups(); return true; } catch(exception& e) { m->errorOut(e, "ParseFastaQCommand", "readGroup"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/fastaqinfocommand.h000066400000000000000000000045101424121717000220650ustar00rootroot00000000000000#ifndef PARSEFASTAQCOMMAND_H #define PARSEFASTAQCOMMAND_H /* * parsefastaqcommand.h * Mothur * * Created by westcott on 9/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "trimoligos.h" #include "sequence.hpp" #include "fastqread.h" #include "groupmap.h" #include "oligos.h" #include "filefile.hpp" #include "splitgroupscommand.h" class ParseFastaQCommand : public Command { public: ParseFastaQCommand(string); ~ParseFastaQCommand() = default; vector setParameters(); string getCommandName() { return "fastq.info"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Parse.fastq"; } string getDescription() { return "reads a fastq file and creates a fasta and quality file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; string inputDir, fastaQFile, format, oligosfile, groupfile, file, inputfile, ffastq, rfastq; bool abort, fasta, qual, pacbio, pairedOligos, reorient, createFileGroup, hasIndex; int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split, numBarcodes, numPrimers, numLinkers, numSpacers, numRPrimers, fileOption; GroupMap* groupMap; Oligos oligos; map file2Group; vector< vector > readFile(); string ffqnoMatchFile, rfqnoMatchFile, ffnoMatchFile, rfnoMatchFile, fqnoMatchFile, rqnoMatchFile; vector Groups; map seqGroups; map groupCounts; set processFile(string inputfile, TrimOligos*&, TrimOligos*&); int processFile(vector inputfiles, TrimOligos*&, TrimOligos*&); vector convertQual(string); vector convertTable; bool readOligos(string oligosFile); bool readGroup(string oligosFile); int findGroup(Sequence&, QualityScores&, string&, TrimOligos*&, TrimOligos*&, int, int); int findGroup(Sequence, string&, string); int findGroup(Sequence&, QualityScores&, Sequence&, QualityScores&, string&, TrimOligos*&, TrimOligos*&, int, int); map > splitFastqFile(string outputGroupFile, string resultFastqfile); //uses split.groups command to parse the reads by sample }; #endif mothur-1.48.0/source/commands/filterseqscommand.cpp000066400000000000000000000571441424121717000224610ustar00rootroot00000000000000/* * filterseqscommand.cpp * Mothur * * Created by Thomas Ryabin on 5/4/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "filterseqscommand.h" #include "sequence.hpp" //********************************************************************************************************************** vector FilterSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-filter",false,true, true); parameters.push_back(pfasta); CommandParameter phard("hard", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(phard); CommandParameter ptrump("trump", "String", "", "*", "", "", "","",false,false, true); parameters.push_back(ptrump); CommandParameter psoft("soft", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psoft); CommandParameter pvertical("vertical", "Boolean", "", "T", "", "", "","",false,false, true); parameters.push_back(pvertical); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false, true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["filter"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string FilterSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n"; helpString += "The filter.seqs command parameters are fasta, trump, soft, hard, processors and vertical. \n"; helpString += "The fasta parameter is required, unless you have a valid current fasta file. You may enter several fasta files to build the filter from and filter, by separating their names with |'s.\n"; helpString += "For example: fasta=abrecovery.fasta|amazon.fasta \n"; helpString += "The trump option will remove a column if the trump character is found at that position in any sequence of the alignment. Default=*, meaning no trump. \n"; helpString += "A soft mask removes any column where the dominant base (i.e. A, T, G, C, or U) does not occur in at least a designated percentage of sequences. Default=0.\n"; helpString += "The hard parameter allows you to enter a file containing the filter you want to use.\n"; helpString += "The vertical parameter removes columns where all sequences contain a gap character. The default is T.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "The filter.seqs command should be in the following format: \n"; helpString += "filter.seqs(fasta=yourFastaFile, trump=yourTrump) \n"; helpString += "Example filter.seqs(fasta=abrecovery.fasta, trump=.).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string FilterSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],filter.fasta"; } else if (type == "filter") { pattern = "[filename],filter"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "getOutputPattern"); exit(1); } } /**************************************************************************************/ FilterSeqsCommand::FilterSeqsCommand(string option) : Command() { try { recalced = false; filterFileName = ""; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fasta = validParameter.validPath(parameters, "fasta"); if (fasta == "not found") { fasta = current->getFastaFile(); if (fasta != "") { fastafileNames.push_back(fasta); m->mothurOut("Using " + fasta + " as input file for the fasta parameter.\n"); string simpleName = util.getSimpleName(fasta); filterFileName += simpleName.substr(0, simpleName.find_first_of('.')); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } else { util.splitAtChar(fasta, fastafileNames, '|'); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < fastafileNames.size(); i++) { bool ignore = false; if (fastafileNames[i] == "current") { fastafileNames[i] = current->getFastaFile(); if (fastafileNames[i] != "") { m->mothurOut("Using " + fastafileNames[i] + " as input file for the fasta parameter where you had given current.\n"); } else { m->mothurOut("You have no current fastafile, ignoring current.\n"); ignore=true; //erase from file list fastafileNames.erase(fastafileNames.begin()+i); i--; } } if (!ignore) { if (util.checkLocations(fastafileNames[i], current->getLocations())) { string simpleName = util.getSimpleName(fastafileNames[i]); filterFileName += simpleName.substr(0, simpleName.find_first_of('.')); current->setFastaFile(fastafileNames[i]); } else { fastafileNames.erase(fastafileNames.begin()+i); i--; } //erase from file list } } //make sure there is at least one valid file left if (fastafileNames.size() == 0) { m->mothurOut("no valid files.\n"); abort = true; } } if (!abort) { if (outputdir == ""){ outputdir += util.hasPath(fastafileNames[0]); } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; hard = validParameter.validFile(parameters, "hard"); if (hard == "not found") { hard = ""; } else if (hard == "not open") { hard = ""; abort = true; } temp = validParameter.valid(parameters, "trump"); if (temp == "not found") { temp = "*"; } trump = temp[0]; temp = validParameter.valid(parameters, "soft"); if (temp == "not found") { soft = 0; } else { soft = (float)atoi(temp.c_str()) / 100.0; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); vertical = validParameter.valid(parameters, "vertical"); if (vertical == "not found") { if ((hard == "") && (trump == '*') && (soft == 0)) { vertical = "T"; } //you have not given a hard file or set the trump char. else { vertical = "F"; } } numSeqs = 0; } } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand"); exit(1); } } /**************************************************************************************/ int FilterSeqsCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } ifstream inFASTA; util.openInputFile(fastafileNames[0], inFASTA); Sequence testSeq(inFASTA); alignmentLength = testSeq.getAlignLength(); inFASTA.close(); ////////////create filter///////////////// m->mothurOut("Creating Filter...\n"); filter = createFilter(); m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { outputTypes.clear(); return 0; } ofstream outFilter; //prevent giantic file name map variables; variables["[filename]"] = outputdir + filterFileName + "."; if (fastafileNames.size() > 3) { variables["[filename]"] = outputdir + "merge."; } string filterFile = getOutputFileName("filter", variables); util.openOutputFile(filterFile, outFilter); outFilter << filter << endl; outFilter.close(); outputNames.push_back(filterFile); outputTypes["filter"].push_back(filterFile); ////////////run filter///////////////// m->mothurOut("Running Filter...\n"); filterSequences(); m->mothurOutEndLine(); m->mothurOutEndLine(); int filteredLength = 0; for(int i=0;igetControl_pressed()) { outputTypes.clear(); for(int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine(); m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine(); m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine(); m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "execute"); exit(1); } } /**************************************************************************************/ int FilterSeqsCommand::filterSequences() { try { numSeqs = 0; for (int s = 0; s < fastafileNames.size(); s++) { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafileNames[s])); string filteredFasta = getOutputFileName("fasta", variables); vector positions; if (savedPositions.size() != 0) { positions = savedPositions[s]; } else { #if defined NON_WINDOWS positions = util.divideFile(fastafileNames[s], processors); #else positions = util.setFilePosFasta(fastafileNames[s], numSeqs); if (numSeqs < processors) { processors = numSeqs; } #endif } vector lines; #if defined NON_WINDOWS for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFSeqs = positions.size()-1; if (numFSeqs < processors) { processors = numFSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFSeqs / processors; for (int i = 0; i < processors; i++) { long long startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif long long numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta, lines); numSeqs += numFastaSeqs; outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta); } return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "filterSequences"); exit(1); } } /**************************************************************************************/ void driverRunFilter(filterRunData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //adjust start if null strings if (params->start == 0) { params->util.zapGremlins(in); gobble(in); } bool done = false; params->count = 0; string outBuffer = ""; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { string align = seq.getAligned(); string filterSeq = ""; for(int j=0;jalignmentLength;j++){ if(params->filter[j] == '1'){ filterSeq += align[j]; } } outBuffer += '>' + seq.getName() + '\n' + filterSeq + '\n'; } params->count++; //report progress if((params->count) % 1000 == 0){ params->outputWriter->write(outBuffer); outBuffer = ""; params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif } //report progress if((params->count) % 1000 != 0){ params->outputWriter->write(outBuffer); params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } in.close(); } catch(exception& e) { params->m->errorOut(e, "FilterSeqsCommand", "driverRunFilter"); exit(1); } } /**************************************************************************************************/ long long FilterSeqsCommand::createProcessesRunFilter(string F, string filename, string filteredFastaName, vector lines) { try { util.mothurRemove(filteredFastaName); long long num = 0; //create array of worker threads vector workerThreads; vector data; time_t start, end; time(&start); auto synchronizedOutputFile = std::make_shared(filteredFastaName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadWriter = new OutputWriter(synchronizedOutputFile); filterRunData* dataBundle = new filterRunData(filter, filename, threadWriter, lines[i+1].start, lines[i+1].end, alignmentLength); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverRunFilter, dataBundle)); } OutputWriter* threadWriter = new OutputWriter(synchronizedOutputFile); filterRunData* dataBundle = new filterRunData(filter, filename, threadWriter, lines[0].start, lines[0].end, alignmentLength); data.push_back(dataBundle); driverRunFilter(dataBundle); num = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->outputWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputFile->close(); delete threadWriter; delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to filter " + toString(num) + " sequences.\n"); return num; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "createProcessesRunFilter"); exit(1); } } /**************************************************************************************/ string FilterSeqsCommand::createFilter() { try { string filterString = ""; Filters F; if (!util.isEqual(soft, 0)) { F.setSoft(soft); } if (trump != '*') { F.setTrump(trump); } F.setLength(alignmentLength); if(trump != '*' || util.isTrue(vertical) || !util.isEqual(soft, 0)){ F.initialize(); } if(hard.compare("") != 0) { F.doHard(hard); } else { F.setFilter(string(alignmentLength, '1')); } numSeqs = 0; if(trump != '*' || util.isTrue(vertical) || !util.isEqual(soft, 0)){ for (int s = 0; s < fastafileNames.size(); s++) { numSeqs += createProcessesCreateFilter(F, fastafileNames[s]); if (m->getControl_pressed()) { return filterString; } } } F.setNumSeqs(numSeqs); if(util.isTrue(vertical) == 1) { F.doVertical(); } if(!util.isEqual(soft, 0)) { F.doSoft(); } filterString = F.getFilter(); return filterString; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "createFilter"); exit(1); } } /**************************************************************************************/ void driverCreateFilter(filterData* params) { try { if (!params->util.isEqual(params->soft, 0)) { params->F.setSoft(params->soft); } if (params->trump != '*') { params->F.setTrump(params->trump); } params->F.setLength(params->alignmentLength); if(params->trump != '*' || params->vertical || !params->util.isEqual(params->soft, 0)){ params->F.initialize(); } if(params->hard.compare("") != 0) { params->F.doHard(params->hard); } else { params->F.setFilter(string(params->alignmentLength, '1')); } ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //adjust start if null strings if (params->start == 0) { params->util.zapGremlins(in); gobble(in); } bool done = false; params->count = 0; bool error = false; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { if (params->m->getDebug()) { params->m->mothurOutJustToScreen("[DEBUG]: " + seq.getName() + " length = " + toString(seq.getAligned().length()) + '\n'); } if (seq.getAligned().length() != params->alignmentLength) { params->m->mothurOut("[ERROR]: Sequences are not all the same length, please correct.\n"); error = true; if (!params->m->getDebug()) { params->m->setControl_pressed(true); }else{ params->m->mothurOutJustToLog("[DEBUG]: " + seq.getName() + " length = " + toString(seq.getAligned().length()) + '\n'); } } if(params->trump != '*') { params->F.doTrump(seq); } if(params->vertical || !params->util.isEqual(params->soft, 0)) { params->F.getFreqs(seq); } cout.flush(); params->count++; } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif //report progress if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } } //report progress if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } in.close(); if (error) { params->m->setControl_pressed(true); } } catch(exception& e) { params->m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter"); exit(1); } } /**************************************************************************************************/ long long FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) { try { vector lines; vector positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(filename, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { long long startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //save the file positions so we can reuse them in the runFilter function if (!recalced) { savedPositions.push_back(positions); } long long num = 0; bool doVertical = util.isTrue(vertical); //create array of worker threads vector workerThreads; vector data; time_t start, end; time(&start); //Lauch worker threads for (int i = 0; i < processors-1; i++) { filterData* dataBundle = new filterData(filename, lines[i+1].start, lines[i+1].end, alignmentLength, trump, doVertical, soft, hard, i+1); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverCreateFilter, dataBundle)); } filterData* dataBundle = new filterData(filename, lines[0].start, lines[0].end, alignmentLength, trump, doVertical, soft, hard, 0); driverCreateFilter(dataBundle); num = dataBundle->count; F.mergeFilter(dataBundle->F.getFilter()); for (int k = 0; k < alignmentLength; k++) { F.a[k] += dataBundle->F.a[k]; } for (int k = 0; k < alignmentLength; k++) { F.t[k] += dataBundle->F.t[k]; } for (int k = 0; k < alignmentLength; k++) { F.g[k] += dataBundle->F.g[k]; } for (int k = 0; k < alignmentLength; k++) { F.c[k] += dataBundle->F.c[k]; } for (int k = 0; k < alignmentLength; k++) { F.gap[k] += dataBundle->F.gap[k]; } delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; F.mergeFilter(data[i]->F.getFilter()); for (int k = 0; k < alignmentLength; k++) { F.a[k] += data[i]->F.a[k]; } for (int k = 0; k < alignmentLength; k++) { F.t[k] += data[i]->F.t[k]; } for (int k = 0; k < alignmentLength; k++) { F.g[k] += data[i]->F.g[k]; } for (int k = 0; k < alignmentLength; k++) { F.c[k] += data[i]->F.c[k]; } for (int k = 0; k < alignmentLength; k++) { F.gap[k] += data[i]->F.gap[k]; } delete data[i]; delete workerThreads[i]; } time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to create filter for " + toString(num) + " sequences.\n"); if (m->getDebug()) { m->mothurOut("[DEBUG]: filter = " + F.getFilter() + "\n\n"); } return num; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/filterseqscommand.h000077500000000000000000000063251424121717000221240ustar00rootroot00000000000000#ifndef FILTERSEQSCOMMAND_H #define FILTERSEQSCOMMAND_H /* * filterseqscommand.h * Mothur * * Created by Thomas Ryabin on 5/4/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "filters.h" class Sequence; class FilterSeqsCommand : public Command { public: FilterSeqsCommand(string); ~FilterSeqsCommand() = default;; vector setParameters(); string getCommandName() { return "filter.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; } string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector< vector > savedPositions; string vertical, filter, fasta, hard, filterFileName; vector fastafileNames; int alignmentLength, processors; vector bufferSizes; vector outputNames; char trump; bool abort, recalced; float soft; long long numSeqs; string createFilter(); int filterSequences(); long long createProcessesCreateFilter(Filters&, string); long long createProcessesRunFilter(string, string, string, vector); }; /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct filterData { Filters F; int alignmentLength, threadid; unsigned long long start, end; long long count; MothurOut* m; string filename, hard; char trump; float soft; bool vertical; Utils util; filterData(){} filterData(string fn, unsigned long long st, unsigned long long en, int aLength, char tr, bool vert, float so, string ha, int tid) { filename = fn; m = MothurOut::getInstance(); start = st; end = en; trump = tr; alignmentLength = aLength; vertical = vert; soft = so; hard = ha; count = 0; threadid = tid; } }; /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct filterRunData { int alignmentLength; unsigned long long start, end; long long count; MothurOut* m; string filename; string filter; OutputWriter* outputWriter; Utils util; filterRunData(){} filterRunData(string f, string fn, OutputWriter* ofn, unsigned long long st, unsigned long long en, int aLength) { filter = f; outputWriter = ofn; filename = fn; m = MothurOut::getInstance(); start = st; end = en; alignmentLength = aLength; count = 0; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/filtersharedcommand.cpp000077500000000000000000000471031424121717000227510ustar00rootroot00000000000000// // filtersharedcommand.cpp // Mothur // // Created by Sarah Westcott on 1/4/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "filtersharedcommand.h" //********************************************************************************************************************** vector FilterSharedCommand::setParameters(){ try { CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent); CommandParameter prarepercent("rarepercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(prarepercent); CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund); CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal); CommandParameter pminnumsamples("minnumsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminnumsamples); CommandParameter pminpercentsamples("minpercentsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercentsamples); CommandParameter pkeepties("keepties", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pkeepties); CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["shared"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string FilterSharedCommand::getHelpString(){ try { string helpString = ""; helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n"; helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, minnumsamples, minpercentsamples, rarepercent, makerare, keepties, groups and label. You must provide a shared file.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n"; helpString += "You may provide an accnos containing the list of groups to get instead of setting the groups parameter to the groups you wish to select.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU. If any samples abundance falls below the minimum, the OTU is removed. Default=0\n"; helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n"; helpString += "The rarepercent parameter allows you indicate the percentage of otus to remove. The OTUs chosen to be removed are the rarest. For example if you have 1000 OTUs, rarepercent=20 would remove the 200 OTUs with the lowest abundance. Default=0.\n"; helpString += "The keepties parameter is used with the rarepercent parameter. It allows you indicate you want to keep the OTUs with the same abundance as the first 'not rare' OTU. For example if you have 10 OTUs, rarepercent=20 abundances of 20, 18, 15, 15, 10, 5, 3, 3, 3, 1. keepties=t, would remove the 10th OTU, but keep the 9th because its abundance ties the 8th OTU. keepties=f would remove OTUs 9 and 10. Default=T\n"; helpString += "The minnumsamples parameter allows you indicate the minimum number of samples present in an OTU. If the number of samples present falls below the minimum, the OTU is removed. Default=0.\n"; helpString += "The minpercentsamples parameter allows you indicate the minimum percent of sample present in an OTU. For example, if the total number of samples is 10, the number present is 3, and the minpercentsamples=50. The OTU's precent of samples is 0.333, the minimum is 0.50, so the OTU will be removed. Default=0.\n"; helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU. If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n"; helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed. This will preserve the number of reads in your dataset. Default=T\n"; helpString += "The filter.shared command should be in the following format: filter.shared(shared=yourSharedFile, minabund=yourMinAbund, groups=yourGroups, label=yourLabels).\n"; helpString += "Example filter.shared(shared=final.an.shared, minabund=10).\n"; helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n"; helpString += "The filter.shared command outputs a .filter.shared file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string FilterSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],[distance],filter,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** FilterSharedCommand::FilterSharedCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); util.readAccnos(accnosfile, Groups); } //load groups in accnos file into Groups parameter bool setSomething = false; string temp = validParameter.valid(parameters, "minabund"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, minAbund); temp = validParameter.valid(parameters, "mintotal"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, minTotal); temp = validParameter.valid(parameters, "minnumsamples"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, minSamples); temp = validParameter.valid(parameters, "minpercent"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, minPercent); if (minPercent == -1) { minPercent = -0.01; } else if (minPercent < 1) {} //already in percent form else { minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to % temp = validParameter.valid(parameters, "rarepercent"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, rarePercent); if (rarePercent == -1) { rarePercent = -0.01; } else if (rarePercent < 1) {} //already in percent form else { rarePercent = rarePercent / 100.0; } //user gave us a whole number version so convert to % temp = validParameter.valid(parameters, "minpercentsamples"); if (temp == "not found"){ temp = "-1"; } else { setSomething = true; } util.mothurConvert(temp, minPercentSamples); if (minPercentSamples == -1) { minPercentSamples = -0.01; } else if (minPercentSamples < 1) {} //already in percent form else { minPercentSamples = minPercentSamples / 100.0; } //user gave us a whole number version so convert to % temp = validParameter.valid(parameters, "makerare"); if (temp == "not found"){ temp = "T"; } makeRare = util.isTrue(temp); temp = validParameter.valid(parameters, "keepties"); if (temp == "not found"){ temp = "T"; } keepties = util.isTrue(temp); if (!setSomething) { m->mothurOut("\nYou did not set any parameters. I will filter using minabund=1.\n\n"); minAbund = 1; } } } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "FilterSharedCommand"); exit(1); } } //********************************************************************************************************************** int FilterSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } processShared(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { return 0; } //set shared file as new current sharedfile string currentName = ""; itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "execute"); exit(1); } } //********************************************************************************************************************** int FilterSharedCommand::processShared(SharedRAbundVectors*& sharedLookup) { try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); variables["[distance]"] = sharedLookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); if (m->getControl_pressed()) { return 0; } vector rareCounts; rareCounts.resize(Groups.size(), 0); int numGroups = Groups.size(); float defaultSetting = -0.01; //you want to remove a percentage of OTUs set removeLabels; if (!util.isEqual(rarePercent, defaultSetting)) { vector otus; //rank otus by abundance for (int i = 0; i < sharedLookup->getNumBins(); i++) { float otuTotal = 0.0; for (int j = 0; j < numGroups; j++) { otuTotal += sharedLookup->get(i, Groups[j]); } spearmanRank temp(sharedLookup->getOTUName(i), otuTotal); otus.push_back(temp); } //sort by abundance sort(otus.begin(), otus.end(), compareSpearman); //find index of cutoff int indexFirstNotRare = ceil(rarePercent * (float)sharedLookup->getNumBins()); //handle ties if (keepties) { //adjust indexFirstNotRare if needed if (indexFirstNotRare != 0) { //not out of bounds if (util.isEqual(otus[indexFirstNotRare].score, otus[indexFirstNotRare-1].score)) { //you have a tie bool tie = true; for (int i = indexFirstNotRare-1; i >=0; i--) { if (!util.isEqual(otus[indexFirstNotRare].score, otus[i].score)) { //found value below tie indexFirstNotRare = i+1; tie = false; break; } } if (tie) { if (m->getDebug()) { m->mothurOut("For distance " + sharedLookup->getLabel() + " all rare OTUs abundance tie with first 'non rare' OTU, not removing any for rarepercent parameter.\n"); }indexFirstNotRare = 0; } } } } //saved labels for OTUs above rarepercent for (int i = 0; i < indexFirstNotRare; i++) { removeLabels.insert(otus[i].name); } } bool filteredSomething = false; int numRemoved = 0; vector binsToRemove; for (int i = 0; i < sharedLookup->getNumBins(); i++) { if (m->getControl_pressed()) { return 0; } vector abunds = sharedLookup->getOTU(i); bool okay = true; //innocent until proven guilty if (minAbund != -1) { for (int j = 0; j < numGroups; j++) { if (abunds[j] < minAbund) { okay = false; break; } } } if (okay && (minTotal != -1)) { int otuTotal = 0; for (int j = 0; j < numGroups; j++) { otuTotal += abunds[j]; } if (otuTotal < minTotal) { okay = false; } } if (okay && (!util.isEqual(minPercent, defaultSetting))) { double otuTotal = 0; double total = 0; for (int j = 0; j < numGroups; j++) { otuTotal += abunds[j]; total += sharedLookup->getNumSeqs(Groups[j]); } double percent = otuTotal / total; if (percent < minPercent) { okay = false; } } if (okay && (minSamples != -1)) { int samples = 0; for (int j = 0; j < numGroups; j++) { if (abunds[j] != 0) { samples++; } } if (samples < minSamples) { okay = false; } } if (okay && (!util.isEqual(minPercentSamples, defaultSetting))) { double samples = 0; double total = numGroups; for (int j = 0; j < numGroups; j++) { if (abunds[j] != 0) { samples++; } } double percent = samples / total; if (percent < minPercentSamples) { okay = false; } } if (okay && (!util.isEqual(rarePercent, defaultSetting))) { if (removeLabels.count(sharedLookup->getOTUName(i)) != 0) { //are we on the 'bad' list okay = false; } } //did this OTU pass the filter criteria if (!okay) { filteredSomething = true; if (makeRare) { for (int j = 0; j < numGroups; j++) { rareCounts[j] += abunds[j]; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: removing OTU " + sharedLookup->getOTUName(i) + "\n"); } binsToRemove.push_back(i); numRemoved++; } } sharedLookup->removeOTUs(binsToRemove, true); //if we are saving the counts add a "rare" OTU if anything was filtered if (makeRare) { if (filteredSomething) { sharedLookup->push_back(rareCounts, "OTURare1"); } } ofstream out; util.openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); bool printHeaders = true; sharedLookup->print(out, printHeaders); out.close(); m->mothurOut("\nRemoved " + toString(numRemoved) + " OTUs.\n"); return 0; } catch(exception& e) { m->errorOut(e, "FilterSharedCommand", "processShared"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/filtersharedcommand.h000077500000000000000000000022561424121717000224160ustar00rootroot00000000000000// // filtersharedcommand.h // Mothur // // Created by Sarah Westcott on 1/4/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_filtersharedcommand_h #define Mothur_filtersharedcommand_h #include "command.hpp" #include "inputdata.h" class FilterSharedCommand : public Command { public: FilterSharedCommand(string); ~FilterSharedCommand() = default; vector setParameters(); string getCommandName() { return "filter.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Filter.shared"; } string getDescription() { return "remove OTUs based on various criteria"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, pickedGroups, allLines, makeRare, keepties; set labels; //holds labels to be used string groups, label, sharedfile, accnosfile; vector Groups, outputNames; int minAbund, minTotal, minSamples; float minPercent, minPercentSamples, rarePercent; int processShared(SharedRAbundVectors*&); }; #endif mothur-1.48.0/source/commands/getcoremicrobiomecommand.cpp000077500000000000000000000425761424121717000240040ustar00rootroot00000000000000// // GetCoreMicroBiomeCommand.cpp // Mothur // // Created by Sarah Westcott on 5/8/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "getcoremicrobiomecommand.h" #include "getrelabundcommand.h" //********************************************************************************************************************** vector GetCoreMicroBiomeCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","coremicrobiom",false,false, true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","coremicrobiom",false,false, true); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter poutput("output", "Multiple", "fraction-count", "fraction", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pabund("abundance", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter psamples("samples", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(psamples); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["coremicrobiome"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetCoreMicroBiomeCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.coremicrobiome determines the fraction of OTUs that are found in varying numbers of samples for different minimum relative abundances.\n"; helpString += "The get.coremicrobiome parameters are: shared, relabund, groups, label, output, abundance and samples. Shared or relabund is required.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; helpString += "The output parameter is used to specify whether you would like the fraction of OTU's or OTU count outputted. Options are fraction or count. Default=fraction.\n"; helpString += "The abundance parameter allows you to specify an abundance you would like the OTU names outputted for. Values 1 to 100, will be treated as the percentage. For example relabund=0.01 can be set with abundance=1 or abundance=0.01. For abundance values < 1 percent, abundance=0.001 will specify OTUs with relative abundance of 0.001.\n"; helpString += "The samples parameter allows you to specify the minimum number of samples you would like the OTU names outputted for. Must be an interger between 1 and number of samples in your file.\n"; helpString += "The new command should be in the following format: get.coremicrobiome(shared=yourSharedFile)\n"; helpString += "get.coremicrobiom(shared=final.an.shared, abund=30)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetCoreMicroBiomeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "coremicrobiome") { pattern = "[filename],[tag],core.microbiome"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetCoreMicroBiomeCommand::GetCoreMicroBiomeCommand(string option) : Command() { try { allLines = true; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; format = "sharedfile"; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; format = "relabund"; current->setRelAbundFile(relabundfile); } if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or relabund.\n"); abort = true; } } } if (outputdir == ""){ outputdir = util.hasPath(inputFileName); } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "fraction"; } if ((output != "fraction") && (output != "count")) { m->mothurOut(output + " is not a valid output form. Options are fraction and count. I will use fraction.\n"); output = "fraction"; } string temp = validParameter.valid(parameters, "abundance"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, abund); if (!util.isEqual(abund, -1)) { if ((abund < 0) || (abund > 100)) { m->mothurOut(toString(abund) + " is not a valid number for abund. Must be between 0 and 100.\n"); } if (abund < 1) { //convert string temp = toString(abund); string factorString = "1"; bool found = false; for (int i = 0; i < temp.length(); i++) { if (temp[i] == '.') { found = true; } else { if (found) { factorString += "0"; } } } util.mothurConvert(factorString, factor); }else { factor = 100; abund /= 100; } }else { factor = 100; } temp = validParameter.valid(parameters, "samples"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, samples); } } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "GetCoreMicroBiomeCommand"); exit(1); } } //********************************************************************************************************************** int GetCoreMicroBiomeCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (format == "sharedfile") { //convert to relabund string options = "shared=" + sharedfile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.relabund(" + options + ")\n"); Command* relabundCommand = new GetRelAbundCommand(options); relabundCommand->execute(); map > filenames = relabundCommand->getOutputFiles(); relabundfile = filenames["relabund"][0]; inputFileName = relabundfile; format="relabund"; delete relabundCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } InputData input(inputFileName, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (samples != -1) { if ((samples < 1) || (samples > lookup->size())) { m->mothurOut(toString(samples) + " is not a valid number for samples. Must be an integer between 1 and the number of samples in your file. Your file contains " + toString(lookup->size()) + " samples, so I will use that.\n"); samples = lookup->size(); } } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } createTable(lookup); delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetCoreMicroBiomeCommand::createTable(SharedRAbundFloatVectors*& lookup){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[tag]"] = lookup->getLabel(); string outputFileName = getOutputFileName("coremicrobiome", variables); outputNames.push_back(outputFileName); outputTypes["coremicrobiome"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); int numSamples = lookup->size(); int numOtus = lookup->getNumBins(); //table is 100 by numsamples //question we are answering is: what fraction of OTUs in a study have a relative abundance at or above %X //in at least %Y samples. x goes from 0 to 100, y from 1 to numSamples vector< vector > table; table.resize(factor+1); for (int i = 0; i < table.size(); i++) { table[i].resize(numSamples, 0.0); } map > otuNames; if (!(util.isEqual(abund, -1)) && (samples == -1)) { //fill with all samples for (int i = 0; i < numSamples; i++) { vector temp; otuNames[i+1] = temp; } }else if ((util.isEqual(abund, -1)) && (samples != -1)) { //fill with all relabund for (int i = 0; i < factor+1; i++) { vector temp; otuNames[i] = temp; } }else if (!(util.isEqual(abund, -1)) && (samples != -1)) { //only one line is wanted vector temp; int thisAbund = abund*factor; otuNames[thisAbund] = temp; } vector sampleNames = lookup->getNamesGroups(); vector currentLabels = lookup->getOTUNames(); for (int i = 0; i < numOtus; i++) { if (m->getControl_pressed()) { break; } //count number of samples in this otu with a relabund >= spot in count vector counts; counts.resize(factor+1, 0); for (int j = 0; j < sampleNames.size(); j++) { double relabund = lookup->get(i, sampleNames[j]); int wholeRelabund = (int) (floor(relabund*factor)); for (int k = 0; k < wholeRelabund+1; k++) { counts[k]++; } } //add this otus info to table for (int j = 0; j < table.size(); j++) { for (int k = 0; k < counts[j]; k++) { table[j][k]++; } if ((util.isEqual(abund, -1)) && (samples != -1)) { //we want all OTUs with this number of samples if (counts[j] >= samples) { otuNames[j].push_back(currentLabels[i]); } }else if (!(util.isEqual(abund, -1)) && (samples == -1)) { //we want all OTUs with this relabund if (j == (int)(abund*factor)) { for (int k = 0; k < counts[j]; k++) { otuNames[k+1].push_back(currentLabels[i]); } } }else if (!(util.isEqual(abund, -1)) && (samples != -1)) { //we want only OTUs with this relabund for this number of samples if ((j == (int)(abund*factor)) && (counts[j] >= samples)) { otuNames[j].push_back(currentLabels[i]); } } } } //format output if (output == "fraction") { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); } out << "NumSamples\t"; //convert table counts to percents int precisionLength = (toString(factor)).length(); for (int i = 0; i < table.size(); i++) { out << "Relabund-" << setprecision(precisionLength-1)<< (float)(i/(float)factor) << "\t"; if (m->getControl_pressed()) { break; } for (int j = 0; j < table[i].size(); j++) { if (output == "fraction") { table[i][j] /= (double) numOtus; } } } out << endl; for (int i = 0; i < numSamples; i++) { if (m->getControl_pressed()) { break; } out << i+1; for (int j = 0; j < table.size(); j++) { out << setprecision(6) << '\t' << table[j][i]; } out << endl; } out.close(); if (m->getControl_pressed()) { return 0; } if ((samples != -1) || (!util.isEqual(abund, -1))) { string outputFileName2 = outputdir + util.getRootName(util.getSimpleName(inputFileName)) + lookup->getLabel() + ".core.microbiomelist"; outputNames.push_back(outputFileName2); outputTypes["coremicrobiome"].push_back(outputFileName2); ofstream out2; util.openOutputFile(outputFileName2, out2); if ((util.isEqual(abund, -1)) && (samples != -1)) { //we want all OTUs with this number of samples out2 << "Relabund\tOTUList_for_samples=" << samples << "\n"; }else if (!(util.isEqual(abund, -1)) && (samples == -1)) { //we want all OTUs with this relabund out2 << "Samples\tOTUList_for_abund=" << abund*factor << "\n"; }else if (!(util.isEqual(abund, -1)) && (samples != -1)) { //we want only OTUs with this relabund for this number of samples out2 << "Relabund\tOTUList_for_samples=" << samples << "\n"; } for (map >::iterator it = otuNames.begin(); it != otuNames.end(); it++) { if (m->getControl_pressed()) { break; } vector temp = it->second; string list = util.makeList(temp); if (!(util.isEqual(abund, -1)) && (samples == -1)) { //fill with all samples out2 << it->first << '\t' << list << endl; }else { //fill with relabund out2 << fixed << showpoint << setprecision(precisionLength-1) << (it->first/(float)(factor)) << '\t' << list << endl; } } out2.close(); } return 0; } catch(exception& e) { m->errorOut(e, "GetCoreMicroBiomeCommand", "createTable"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getcoremicrobiomecommand.h000077500000000000000000000031531424121717000234350ustar00rootroot00000000000000#ifndef Mothur_getcoremicrobiomcommand_h #define Mothur_getcoremicrobiomcommand_h // // GetCoreMicroBiomeCommand.h // Mothur // // Created by Sarah Westcott on 5/8/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "inputdata.h" /**************************************************************************************************/ class GetCoreMicroBiomeCommand : public Command { public: GetCoreMicroBiomeCommand(string); ~GetCoreMicroBiomeCommand(){} vector setParameters(); string getCommandName() { return "get.coremicrobiome"; } string getCommandCategory() { return "OTU-Based Approaches"; } //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.coremicrobiome"; } string getDescription() { return "determines the fraction of OTUs that are found in varying numbers of samples for different minimum relative abundances"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string relabundfile, sharedfile, inputFileName, format, output; bool allLines; vector Groups; set labels; bool abort; vector outputNames; float abund; int samples, factor; int createTable(SharedRAbundFloatVectors*&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/getcurrentcommand.cpp000077500000000000000000000211521424121717000224530ustar00rootroot00000000000000/* * getcurrentcommand.cpp * Mothur * * Created by westcott on 3/16/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "getcurrentcommand.h" //********************************************************************************************************************** vector GetCurrentCommand::setParameters(){ try { CommandParameter pclear("clear", "String", "", "", "", "", "","",false,false); parameters.push_back(pclear); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetCurrentCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetCurrentCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.current command outputs the current files saved by mothur.\n"; helpString += "The get.current command has one parameter: clear.\n"; helpString += "The clear parameter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n"; helpString += "The get.current command should be in the following format: \n"; helpString += "get.current() or get.current(clear=fasta-name-accnos)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetCurrentCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetCurrentCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],current_files.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetCurrentCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetCurrentCommand::GetCurrentCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; clearTypes = validParameter.valid(parameters, "clear"); if (clearTypes == "not found") { clearTypes = ""; } else { util.splitAtDash(clearTypes, types); } } } catch(exception& e) { m->errorOut(e, "GetCurrentCommand", "GetCurrentCommand"); exit(1); } } //********************************************************************************************************************** int GetCurrentCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } CommandFactory* cFactory; cFactory = CommandFactory::getInstance(); //user wants to clear a type if (types.size() != 0) { for (int i = 0; i < types.size(); i++) { if (m->getControl_pressed()) { break; } //look for file types if (types[i] == "fasta") { current->setFastaFile(""); }else if (types[i] == "qfile") { current->setQualFile(""); }else if (types[i] == "phylip") { current->setPhylipFile(""); }else if (types[i] == "column") { current->setColumnFile(""); }else if (types[i] == "list") { current->setListFile(""); }else if (types[i] == "rabund") { current->setRabundFile(""); }else if (types[i] == "sabund") { current->setSabundFile(""); }else if (types[i] == "name") { current->setNameFile(""); }else if (types[i] == "group") { current->setGroupFile(""); }else if (types[i] == "order") { current->setOrderFile(""); }else if (types[i] == "ordergroup") { current->setOrderGroupFile(""); }else if (types[i] == "tree") { current->setTreeFile(""); }else if (types[i] == "shared") { current->setSharedFile(""); }else if (types[i] == "relabund") { current->setRelAbundFile(""); }else if (types[i] == "clr") { current->setCLRFile(""); }else if (types[i] == "design") { current->setDesignFile(""); }else if (types[i] == "sff") { current->setSFFFile(""); }else if (types[i] == "oligos") { current->setOligosFile(""); }else if (types[i] == "accnos") { current->setAccnosFile(""); }else if (types[i] == "taxonomy") { current->setTaxonomyFile(""); }else if (types[i] == "constaxonomy") { current->setConsTaxonomyFile(""); }else if (types[i] == "contigsreport") { current->setContigsReportFile(""); }else if (types[i] == "flow") { current->setFlowFile(""); }else if (types[i] == "biom") { current->setBiomFile(""); }else if (types[i] == "count") { current->setCountFile(""); }else if (types[i] == "summary") { current->setSummaryFile(""); }else if (types[i] == "file") { current->setFileFile(""); }else if (types[i] == "file") { current->setSampleFile(""); }else if (types[i] == "processors") { current->setProcessors("1"); }else if (types[i] == "all") { current->clearCurrentFiles(); }else { m->mothurOut("[ERROR]: mothur does not save a current file for " + types[i]); m->mothurOutEndLine(); } } } unsigned long long ramUsed, total; ramUsed = util.getRAMUsed(); total = util.getTotalRAM(); m->mothurOut("\nCurrent RAM usage: " + toString(ramUsed/(double)GIG) + " Gigabytes. Total Ram: " + toString(total/(double)GIG) + " Gigabytes.\n"); if (current->hasCurrentFiles()) { map variables; variables["[filename]"] = util.getFullPathName(outputdir); string filename = getOutputFileName("summary", variables); m->mothurOut("\nCurrent files saved by mothur:\n"); current->printCurrentFiles(filename); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); } vector inputDirs = current->getInputDir(); if (inputDirs.size() != 0) { m->mothurOut("\nCurrent input directories saved by mothur:\n"); for (int i = 0; i < inputDirs.size(); i++) { m->mothurOut("\t" + inputDirs[i] + "\n"); } m->mothurOutEndLine(); } string outputdir = current->getOutputDir(); if (outputdir != "") { m->mothurOut("\nCurrent output directory saved by mothur: " + outputdir); m->mothurOutEndLine(); } vector defaultPath = current->getDefaultPath(); if (defaultPath.size() != 0) { m->mothurOut("\nCurrent default directories saved by mothur:\n"); for (int i = 0; i < defaultPath.size(); i++) { m->mothurOut("\t" + defaultPath[i] + "\n"); } m->mothurOutEndLine(); } string temp = "."; temp += PATH_SEPARATOR; temp = util.getFullPathName(temp); m->mothurOutEndLine(); m->mothurOut("Current working directory: " + temp); m->mothurOutEndLine(); if (current->hasCurrentFiles()) { m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); } return 0; } catch(exception& e) { m->errorOut(e, "GetCurrentCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getcurrentcommand.h000077500000000000000000000016401424121717000221200ustar00rootroot00000000000000#ifndef GETCURRENTCOMMAND_H #define GETCURRENTCOMMAND_H /* * getcurrentcommand.h * Mothur * * Created by westcott on 3/16/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "commandfactory.hpp" class GetCurrentCommand : public Command { public: GetCurrentCommand(string); ~GetCurrentCommand() = default; vector setParameters(); string getCommandName() { return "get.current"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.current"; } string getDescription() { return "get current files saved by mothur"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; bool abort; string clearTypes; vector types; }; #endif mothur-1.48.0/source/commands/getdistscommand.cpp000077500000000000000000000347031424121717000221250ustar00rootroot00000000000000// // getdistscommand.cpp // Mothur // // Created by Sarah Westcott on 1/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "getdistscommand.h" //********************************************************************************************************************** vector GetDistsCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetDistsCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.dists command selects distances from a phylip or column file related to groups or sequences listed in an accnos file.\n"; helpString += "The get.dists command parameters are accnos, phylip and column.\n"; helpString += "The get.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n"; helpString += "Example get.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetDistsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylip") { pattern = "[filename],pick,[extension]"; } else if (type == "column") { pattern = "[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetDistsCommand::GetDistsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { current->setColumnFile(columnfile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or column file.\n"); abort = true; } } } } } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "GetDistsCommand"); exit(1); } } //********************************************************************************************************************** int GetDistsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get names you want to keep names = util.readAccnos(accnosfile); if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (phylipfile != "") { readPhylip(); } if (columnfile != "") { readColumn(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetDistsCommand::readPhylip(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(phylipfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(phylipfile)); variables["[extension]"] = util.getExtension(phylipfile); string outputFileName = getOutputFileName("phylip", variables); ifstream in; util.openInputFile(phylipfile, in); float distance; int square, nseqs; square = 0; string name; unsigned int row; set rows; //converts names in names to a index row = 0; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } if (names.count(name) != 0) { rows.insert(row); } row++; //is the matrix square? char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } //map name to row/column if(square == 0){ for(int i=1;i> name; if (names.count(name) != 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return 0; } in >> distance; } } } else{ for(int i=1;i> name; if (names.count(name) != 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return 0; } in >> distance; } } } in.close(); if (m->getControl_pressed()) { return 0; } //read through file only printing rows and columns of seqs in names ifstream inPhylip; util.openInputFile(phylipfile, inPhylip); inPhylip >> numTest; ofstream out; util.openOutputFile(outputFileName, out); outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); out << names.size() << endl; unsigned int count = 0; if(square == 0){ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) == 0) { ignoreRow = true; } else{ out << name << '\t'; count++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return 0; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << distance << '\t'; } } } if (!ignoreRow) { out << endl; } } } else{ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) == 0) { ignoreRow = true; } else{ out << name << '\t'; count++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return 0; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << distance << '\t'; } } } if (!ignoreRow) { out << endl; } } } inPhylip.close(); out.close(); if (count == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file.\n"); } else if (count != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file.\n"); //rewrite with new number util.renameFile(outputFileName, outputFileName+".temp"); ofstream out2; util.openOutputFile(outputFileName, out2); out2 << count << endl; ifstream in3; util.openInputFile(outputFileName+".temp", in3); in3 >> nseqs; gobble(in3); char buffer[4096]; while (!in3.eof()) { in3.read(buffer, 4096); out2.write(buffer, in3.gcount()); } in3.close(); out2.close(); util.mothurRemove(outputFileName+".temp"); } m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "readPhylip"); exit(1); } } //********************************************************************************************************************** int GetDistsCommand::readColumn(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(columnfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(columnfile)); variables["[extension]"] = util.getExtension(columnfile); string outputFileName = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(columnfile, in); set foundNames; string firstName, secondName; float distance; while (!in.eof()) { if (m->getControl_pressed()) { out.close(); in.close(); return 0; } in >> firstName >> secondName >> distance; gobble(in); //are both names in the accnos file if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) { out << firstName << '\t' << secondName << '\t' << distance << endl; foundNames.insert(firstName); foundNames.insert(secondName); } } in.close(); out.close(); if (foundNames.size() == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file.\n"); } else if (foundNames.size() != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file.\n"); } m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetDistsCommand", "readColumn"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getdistscommand.h000077500000000000000000000020061424121717000215610ustar00rootroot00000000000000// // getdistscommand.h // Mothur // // Created by Sarah Westcott on 1/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_getdistscommand_h #define Mothur_getdistscommand_h #include "command.hpp" class GetDistsCommand : public Command { public: GetDistsCommand(string); ~GetDistsCommand(){} vector setParameters(); string getCommandName() { return "get.dists"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.dists"; } string getDescription() { return "gets distances from a phylip or column file related to groups or sequences listed in an accnos file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; string accnosfile, phylipfile, columnfile; bool abort; vector outputNames; int readPhylip(); int readColumn(); }; #endif mothur-1.48.0/source/commands/getgroupcommand.cpp000077500000000000000000000077451424121717000221410ustar00rootroot00000000000000/* * getgroupcommand.cpp * Mothur * * Created by Thomas Ryabin on 2/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "getgroupcommand.h" #include "inputdata.h" //********************************************************************************************************************** vector GetgroupCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "current", "none", "none", "none","",false,true, true); parameters.push_back(pshared); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetgroupCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetgroupCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.group command parameter is shared and it's required if you have no valid current file.\n"; helpString += "You may not use any parameters with the get.group command.\n"; helpString += "The get.group command should be in the following format: \n"; helpString += "get.group()\n"; helpString += "Example get.group().\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetgroupCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** GetgroupCommand::GetgroupCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); map::iterator it; ValidParameters validParameter; //get shared file sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } } } catch(exception& e) { m->errorOut(e, "GetgroupCommand", "GetgroupCommand"); exit(1); } } //********************************************************************************************************************** int GetgroupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", nullVector); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); vector namesOfGroups = lookup->getNamesGroups(); delete lookup; for (int i = 0; i < namesOfGroups.size(); i++) { m->mothurOut(namesOfGroups[i]); m->mothurOutEndLine(); } m->mothurOut("\nOutput File Names: \n\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetgroupCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getgroupcommand.h000077500000000000000000000016051424121717000215730ustar00rootroot00000000000000#ifndef GETGROUPCOMMAND_H #define GETGROUPCOMMAND_H /* * getgroupcommand.h * Mothur * * Created by Thomas Ryabin on 2/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" class GetgroupCommand : public Command { public: GetgroupCommand(string); ~GetgroupCommand() = default; vector setParameters(); string getCommandName() { return "get.group"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string) { return ""; } string getCitation() { return "http://www.mothur.org/wiki/Get.group"; } string getDescription() { return "outputs group names"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string outputFile, sharedfile; vector outputNames; ofstream out; ifstream in; bool abort; }; #endif mothur-1.48.0/source/commands/getgroupscommand.cpp000077500000000000000000001322161424121717000223140ustar00rootroot00000000000000/* * getgroupscommand.cpp * Mothur * * Created by westcott on 11/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "getgroupscommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "inputdata.h" #include "designmap.h" //********************************************************************************************************************** vector GetGroupsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT","fasta",false,false, true); parameters.push_back(pfasta); CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false, true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false, true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT","group",false,false, true); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT","design",false,false, true); parameters.push_back(pdesign); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT","list",false,false, true); parameters.push_back(plist); CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "FNGLT","flow",false,false, true); parameters.push_back(pflow); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT","taxonomy",false,false, true); parameters.push_back(ptaxonomy); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetGroupsCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design, phylip, column or shared file.\n"; helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n"; helpString += "The get.groups command parameters are accnos, fasta, name, group, count, list, taxonomy, shared, design, phylip, column, sets and groups. The group or count parameter is required, unless you have a current group or count file, or are using a shared file.\n"; helpString += "You must also provide an accnos containing the list of groups to get or set the groups or sets parameter to the groups you wish to select.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like. You can separate set names with dashes.\n"; helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n"; helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n"; helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetGroupsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "flow") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "phylip") { pattern = "[filename],pick,[extension]"; } else if (type == "column") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "design") { pattern = "[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetGroupsCommand::GetGroupsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } flowfile = validParameter.validFile(parameters, "flow"); if (flowfile == "not open") { flowfile = ""; abort = true; } else if (flowfile == "not found") { flowfile = ""; } else { current->setFlowFile(flowfile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { current->setColumnFile(columnfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); } sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { designfile = ""; } else { current->setDesignFile(designfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { //is there are current file available for any of these? if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) { //give priority to group, then shared groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current groupfile, countfile or sharedfile and one is required.\n"); abort = true; } } } }else { //give priority to shared, then group sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current groupfile, designfile, countfile or sharedfile and one is required.\n"); abort = true; } } } } } } if ((accnosfile == "") && (Groups.size() == 0) && (Sets.size() == 0)) { m->mothurOut("[ERROR]: You must provide an accnos file or specify groups using the groups or sets parameters.\n"); abort = true; } if ((Groups.size() != 0) && (Sets.size() != 0)) { m->mothurOut("[ERROR]: You cannot use the groups and sets parameters at the same time, quitting.\n"); abort = true; } if ((Sets.size() != 0) && (designfile == "")) { m->mothurOut("[ERROR]: You must provide a design file when using the sets parameter.\n"); abort = true; } if ((flowfile == "") && (phylipfile == "") && (columnfile == "") && (fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("[ERROR]: You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count, phylip, column or list.\n"); abort = true; } if (((groupfile == "") && (countfile == "")) && ((flowfile != "") || (namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("[ERROR]: If using a fasta, flow, name, taxonomy, group or list, then you must provide a group or count file.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand"); exit(1); } } //********************************************************************************************************************** int GetGroupsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { util.readAccnos(accnosfile, Groups); } else if (Sets.size() != 0) { fillGroupsFromDesign(); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); //fill names with names of sequences that are from the groups we want to remove fillNames(); delete groupMap; }else if (countfile != ""){ if ((fastafile != "") || (listfile != "") || (taxfile != "")) { //m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } CountTable ct; ct.readTable(countfile, true, false, Groups); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); int selectedCount = ct.getNumSeqs(); if (selectedCount == 0) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } else { ct.printTable(outputFileName); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file.\n"); vector thisGroupsSeqs = ct.getNamesOfSeqs(); for (int j = 0; j < thisGroupsSeqs.size(); j++) { names.insert(thisGroupsSeqs[j]); } } if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (flowfile != "") { readFlow(); } if (groupfile != "") { readGroup(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } if (designfile != "") { readDesign(); } if (phylipfile != "") { readPhylip(); } if (columnfile != "") { readColumn(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOut("\nOutput File names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("design"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setDesignFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readFasta(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string name; bool wroteSomething = false; int selectedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { //if this name is in the accnos file if (names.count(name) != 0) { wroteSomething = true; currSeq.printSequence(out); selectedCount++; }else{ //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; currSeq.setName(it->second); currSeq.printSequence(out); selectedCount++; } } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readFlow(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(flowfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(flowfile)); variables["[extension]"] = util.getExtension(flowfile); string outputFileName = getOutputFileName("flow", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(flowfile, in); string name, flows; bool wroteSomething = false; int selectedCount = 0; in >> flows; gobble(in); //read numflows out << flows << endl; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); flows = util.getline(in); gobble(in); if (name != "") { //if this name is in the accnos file if (names.count(name) != 0) { wroteSomething = true; out << name << '\t' << flows << endl; selectedCount++; }else{ //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; name = it->second; out << name << '\t' << flows << endl; selectedCount++; } } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } outputTypes["flow"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your flow file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readShared(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } InputData input(sharedfile, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); bool wroteSomething = false; bool printHeaders = true; while(lookup != nullptr) { variables["[tag]"] = lookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); if (m->getControl_pressed()) { out.close(); util.mothurRemove(outputFileName); delete lookup; return; } lookup->print(out, printHeaders); wroteSomething = true; //get next line to process //prevent memory leak delete lookup; lookup = input.getSharedRAbundVectors(); out.close(); } if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } string groupsString = ""; for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; } groupsString += Groups[Groups.size()-1]; m->mothurOut("Selected groups: " + groupsString + " from your shared file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readShared"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readList(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int selectedCount = 0; while(list != nullptr) { selectedCount = 0; variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector binLabels = list->getLabels(); vector newBinLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(outputFileName); return; } //parse out names that are in accnos file string binnames = list->get(i); vector thisBinNames; util.splitAtComma(binnames, thisBinNames); string newNames = ""; for (int j = 0; j < thisBinNames.size(); j++) { string name = thisBinNames[j]; //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; } else{ //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; selectedCount++; } } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.print(out, false); } out.close(); delete list; list = input.getListVector(); } if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readName(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputFileName = getOutputFileName("name", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(namefile, in); string name, firstCol, secondCol; bool wroteSomething = false; int selectedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> firstCol; gobble(in); in >> secondCol; vector parsedNames; util.splitAtComma(secondCol, parsedNames); vector validSecond; validSecond.clear(); for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) != 0) { validSecond.push_back(parsedNames[i]); } } selectedCount += validSecond.size(); //if the name in the first column is in the set then print it and any other names in second column also in set if (names.count(firstCol) != 0) { wroteSomething = true; out << firstCol << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; //make first name in set you come to first column and then add the remaining names to second column }else { //you want part of this row if (validSecond.size() != 0) { wroteSomething = true; out << validSecond[0] << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; uniqueToRedundant[firstCol] = validSecond[0]; } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readName"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readGroup(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(groupfile, in); string name, group; bool wroteSomething = false; int selectedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); //read from first column in >> group; //read from second column //if this name is in the accnos file if (names.count(name) != 0) { wroteSomething = true; out << name << '\t' << group << endl; selectedCount++; } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readGroup"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readDesign(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(designfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(designfile)); variables["[extension]"] = util.getExtension(designfile); string outputFileName = getOutputFileName("design", variables); DesignMap designMap(designfile); if (m->getControl_pressed()) { return ; } bool wroteSomething = false; ofstream out; util.openOutputFile(outputFileName, out); int numGroupsFound = designMap.printGroups(out, Groups); if (numGroupsFound > 0) { wroteSomething = true; } out.close(); names.clear(); names = util.mothurConvert(Groups); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain groups from the groups you wish to get.\n"); } outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(numGroupsFound) + " groups from your design file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readDesign"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; int selectedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); tax = util.getline(in); gobble(in); //if this name is in the accnos file if (names.count(name) != 0) { wroteSomething = true; out << name << '\t' << tax << endl; selectedCount++; }else{ //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; out << it->second << '\t' << tax << endl; selectedCount++; } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readPhylip(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(phylipfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(phylipfile)); variables["[extension]"] = util.getExtension(phylipfile); string outputFileName = getOutputFileName("phylip", variables); ifstream in; util.openInputFile(phylipfile, in); float distance; int square, nseqs; square = 0; string name; unsigned int row; set rows; //converts names in names to a index row = 0; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } if (names.count(name) != 0) { rows.insert(row); } row++; //is the matrix square? char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } //map name to row/column if(square == 0){ for(int i=1;i> name; if (names.count(name) != 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return; } in >> distance; } } } else{ for(int i=1;i> name; if (names.count(name) != 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return; } in >> distance; } } } in.close(); if (m->getControl_pressed()) { return; } //read through file only printing rows and columns of seqs in names ifstream inPhylip; util.openInputFile(phylipfile, inPhylip); inPhylip >> numTest; ofstream out; util.openOutputFile(outputFileName, out); outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); out << names.size() << endl; unsigned int count = 0; if(square == 0){ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) == 0) { ignoreRow = true; } else{ out << name << '\t'; count++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << distance << '\t'; } } } if (!ignoreRow) { out << endl; } } } else{ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) == 0) { ignoreRow = true; } else{ out << name << '\t'; count++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << distance << '\t'; } } } if (!ignoreRow) { out << endl; } } } inPhylip.close(); out.close(); if (count == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file.\n"); } else if (count != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file.\n"); //rewrite with new number util.renameFile(outputFileName, outputFileName+".temp"); ofstream out2; util.openOutputFile(outputFileName, out2); out2 << count << endl; ifstream in3; util.openInputFile(outputFileName+".temp", in3); in3 >> nseqs; gobble(in3); char buffer[4096]; while (!in3.eof()) { in3.read(buffer, 4096); out2.write(buffer, in3.gcount()); } in3.close(); out2.close(); util.mothurRemove(outputFileName+".temp"); } m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readPhylip"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::readColumn(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(columnfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(columnfile)); variables["[extension]"] = util.getExtension(columnfile); string outputFileName = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(columnfile, in); set foundNames; string firstName, secondName; float distance; while (!in.eof()) { if (m->getControl_pressed()) { out.close(); in.close(); return; } in >> firstName >> secondName >> distance; gobble(in); //are both names in the accnos file if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) { out << firstName << '\t' << secondName << '\t' << distance << endl; foundNames.insert(firstName); foundNames.insert(secondName); } } in.close(); out.close(); if (foundNames.size() == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file.\n"); } else if (foundNames.size() != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file.\n"); } m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file.\n"); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "readColumn"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::fillNames(){ try { vector seqs = groupMap->getNamesSeqs(); for (int i = 0; i < seqs.size(); i++) { if (m->getControl_pressed()) { return; } string group = groupMap->getGroup(seqs[i]); if (util.inUsersGroups(group, Groups)) { names.insert(seqs[i]); } } } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "fillNames"); exit(1); } } //********************************************************************************************************************** void GetGroupsCommand::fillGroupsFromDesign(){ try { DesignMap designMap(designfile); if (m->getControl_pressed()) { return ; } Groups = designMap.getNamesGroups(Sets); } catch(exception& e) { m->errorOut(e, "GetGroupsCommand", "fillGroupsFromDesign"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getgroupscommand.h000077500000000000000000000033471424121717000217630ustar00rootroot00000000000000#ifndef GETGROUPSCOMMAND_H #define GETGROUPSCOMMAND_H /* * getgroupscommand.h * Mothur * * Created by westcott on 11/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "groupmap.h" class GetGroupsCommand : public Command { #ifdef UNIT_TEST friend class TestGetGroupsCommand; #endif public: GetGroupsCommand(string); ~GetGroupsCommand(){} vector setParameters(); string getCommandName() { return "get.groups"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.groups"; } string getDescription() { return "gets sequences from a list, fasta, name, group, shared, design or taxonomy file from a given group or set of groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; map uniqueToRedundant; //if a namefile is given and the first column name is not selected //then the other files need to change the unique name in their file to match. //only add the names that need to be changed to keep the map search quick string sets, accnosfile, countfile, fastafile, namefile, groupfile, listfile, designfile, taxfile, groups, sharedfile, phylipfile, columnfile, flowfile; bool abort; vector outputNames, Groups, Sets; GroupMap* groupMap; void readFasta(); void readFlow(); void readName(); void readGroup(); void readList(); void readTax(); void fillNames(); void readShared(); void readDesign(); void readPhylip(); void readColumn(); void fillGroupsFromDesign(); }; #endif mothur-1.48.0/source/commands/getlabelcommand.cpp000077500000000000000000000124711424121717000220540ustar00rootroot00000000000000/* * GetlabelCommand.cpp * Mothur * * Created by Thomas Ryabin on 1/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "getlabelcommand.h" //********************************************************************************************************************** vector GetlabelCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false, true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false, true); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false, true); parameters.push_back(psabund); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetlabelCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetlabelCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.label command parameters are list, sabund and rabund file. \n"; helpString += "The get.label command should be in the following format: \n"; helpString += "get.label()\n"; helpString += "Example get.label().\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetlabelCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** GetlabelCommand::GetlabelCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } if ((listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to list, then rabund, then sabund //if there is a current shared file, use it listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund or rabund file.\n"); abort = true; } } } } } } catch(exception& e) { m->errorOut(e, "GetlabelCommand", "GetlabelCommand"); exit(1); } } //********************************************************************************************************************** int GetlabelCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData* input = new InputData(inputfile, format, nullVector); OrderVector* order = input->getOrderVector(); string label = order->getLabel(); while (order != nullptr) { if (m->getControl_pressed()) { delete input; delete order; return 0; } label = order->getLabel(); m->mothurOut(label); m->mothurOutEndLine(); delete order; order = input->getOrderVector(); } delete input; return 0; } catch(exception& e) { m->errorOut(e, "GetlabelCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getlabelcommand.h000077500000000000000000000016501424121717000215160ustar00rootroot00000000000000#ifndef GETLABELCOMMAND_H #define GETLABELCOMMAND_H /* * getlabelcommand.h * Mothur * * Created by Thomas Ryabin on 1/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "ordervector.hpp" #include "inputdata.h" class GetlabelCommand : public Command { public: GetlabelCommand(string); ~GetlabelCommand(){} vector setParameters(); string getCommandName() { return "get.label"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string) { return ""; } string getCitation() { return "http://www.mothur.org/wiki/Get.label"; } string getDescription() { return "outputs labels"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string inputfile, listfile, rabundfile, sabundfile, format; bool abort; vector outputNames; }; #endif mothur-1.48.0/source/commands/getlineagecommand.cpp000066400000000000000000000616051424121717000224010ustar00rootroot00000000000000/* * getlineagecommand.cpp * Mothur * * Created by westcott on 9/24/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "getlineagecommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "counttable.h" #include "inputdata.h" //********************************************************************************************************************** vector GetLineageCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false, true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false, true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false, true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false, true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true, true); parameters.push_back(ptaxon); CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["shared"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetLineageCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n"; helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n"; helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups. You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n"; helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n"; helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n"; helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n"; helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n"; helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n"; helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n"; helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n"; helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetLineageCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "alignreport") { pattern = "[filename],pick.[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetLineageCommand::GetLineageCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } alignfile = validParameter.validFile(parameters, "alignreport"); if (alignfile == "not open") { abort = true; } else if (alignfile == "not found") { alignfile = ""; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } constaxonomy = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomy == "not open") { constaxonomy = ""; abort = true; } else if (constaxonomy == "not found") { constaxonomy = ""; } if ((constaxonomy == "") && (taxfile == "")) { taxfile = current->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required.\n"); abort = true; } } string usedDups = "true"; string temp = validParameter.valid(parameters, "dups"); if (temp == "not found") { if (namefile != "") { temp = "true"; } else { temp = "false"; usedDups = ""; } } dups = util.isTrue(temp); countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } taxons = validParameter.valid(parameters, "taxon"); if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct.\n"); abort = true; } else { //rip off quotes if (taxons[0] == '\'') { taxons = taxons.substr(1); } if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); } } util.splitAtChar(taxons, listOfTaxons, '-'); if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile.\n"); abort = true; } if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) { m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n"); abort = true; } if ((constaxonomy != "") && (taxfile != "")) { m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true; } if ((sharedfile != "") && (taxfile != "")) { m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true; } if ((sharedfile != "") || (listfile != "")) { label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile.\n"); } } } } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "GetLineageCommand"); exit(1); } } //********************************************************************************************************************** int GetLineageCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (m->getControl_pressed()) { return 0; } if (countfile != "") { if ((fastafile != "") || (listfile != "") || (taxfile != "")) { //m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } } //read through the correct file and output lines you want to keep if (taxfile != "") { string accnosFileName = readTax(); //fills the set of names to get if (!util.isBlank(accnosFileName)) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); runGetSeqs(accnosFileName); }else { util.mothurRemove(accnosFileName); } }else { string accnosFileName = readConsTax(); if (!util.isBlank(accnosFileName)) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); runGetOTUs(accnosFileName); } else { util.mothurRemove(accnosFileName); } } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetLineageCommand::runGetSeqs(string accnosFileName){ try { //use remove.seqs to create new list and shared files if ((namefile != "") || (fastafile != "") || (countfile != "") || (groupfile != "") || (alignfile != "") || (listfile != "")) { string inputString = "accnos=" + accnosFileName; if (namefile != "") { inputString += ", name=" + namefile; } if (countfile != "") { inputString += ", count=" + countfile; } if (fastafile != "") { inputString += ", fasta=" + fastafile; } if (groupfile != "") { inputString += ", group=" + groupfile; } if (alignfile != "") { inputString += ", alignreport=" + alignfile; } if (listfile != "") { inputString += ", list=" + listfile; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); outputTypes.insert(filenames.begin(), filenames.end()); if (listfile != "") { vector files = filenames["list"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (namefile != "") { vector files = filenames["name"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (countfile != "") { vector files = filenames["count"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (fastafile != "") { vector files = filenames["fasta"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (groupfile != "") { vector files = filenames["group"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (alignfile != "") { vector files = filenames["alignreport"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } } return 0; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "runGetSeqs"); exit(1); } } //********************************************************************************************************************** int GetLineageCommand::runGetOTUs(string accnosFileName){ try { //use remove.otus to create new list and shared files if ((listfile != "") || (sharedfile != "")) { string inputString = "accnos=" + accnosFileName; if (listfile != "") { inputString += ", list=" + listfile; } if (sharedfile != "") { inputString += ", shared=" + sharedfile; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: get.otus(" + inputString + ")\n"); current->setMothurCalling(true); Command* getCommand = new GetOtusCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); outputTypes.insert(filenames.begin(), filenames.end()); if (listfile != "") { vector files = filenames["list"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (sharedfile != "") { vector files = filenames["shared"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } } return 0; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "runGetOTUs"); exit(1); } } //********************************************************************************************************************** string GetLineageCommand::readTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); string accnosFileName = outputFileName + ".accnos"; ofstream out, outAccnos; util.openOutputFile(outputFileName, out); util.openOutputFile(accnosFileName, outAccnos); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; vector taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false); vector< vector > searchTaxons; searchTaxons.resize(listOfTaxons.size()); for (int i = 0; i < listOfTaxons.size(); i++) { bool hasCon = false; searchTaxons[i] = util.getTaxons(listOfTaxons[i], hasCon); taxonsHasConfidence[i] = hasCon; } while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; gobble(in); tax = util.getline(in); gobble(in); Taxonomy thisSeq(name, tax); vector otuTax = thisSeq.getTaxons(); util.removeQuotes(otuTax); if (util.searchTax(otuTax, taxonsHasConfidence, searchTaxons)) { out << name << '\t' << tax << endl; outAccnos << name << endl; wroteSomething = true; } } in.close(); out.close(); outAccnos.close(); if (!wroteSomething) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + ".\n"); } outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); return accnosFileName; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "readTax"); exit(1); } } //********************************************************************************************************************** string GetLineageCommand::readConsTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomy); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomy)); variables["[extension]"] = util.getExtension(constaxonomy); string outputFileName = getOutputFileName("constaxonomy", variables); string accnosFileName = outputFileName + ".accnos"; ofstream out, outAccnos; util.openOutputFile(outputFileName, out); util.openOutputFile(accnosFileName, outAccnos); ifstream in; util.openInputFile(constaxonomy, in); //read headers string headers = util.getline(in); out << headers << endl; bool wroteSomething = false; vector taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false); vector< vector > searchTaxons; searchTaxons.resize(listOfTaxons.size()); for (int i = 0; i < listOfTaxons.size(); i++) { bool hasCon = false; searchTaxons[i] = util.getTaxons(listOfTaxons[i], hasCon); taxonsHasConfidence[i] = hasCon; } while(!in.eof()){ if (m->getControl_pressed()) { break; } Taxonomy thisOtu(in); vector otuTax = thisOtu.getTaxons(); util.removeQuotes(otuTax); if (util.searchTax(otuTax, taxonsHasConfidence, searchTaxons)) { wroteSomething = true; outAccnos << thisOtu.getName() << endl; thisOtu.printConsTax(out); } } in.close(); out.close(); outAccnos.close(); if (!wroteSomething) { m->mothurOut("Your taxonomy file does not contain any OTUs from " + taxons + ".\n"); } outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName); return accnosFileName; } catch(exception& e) { m->errorOut(e, "GetLineageCommand", "readConsTax"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getlineagecommand.h000077500000000000000000000025111424121717000220400ustar00rootroot00000000000000#ifndef GETLINEAGECOMMAND_H #define GETLINEAGECOMMAND_H /* * getlineagecommand.h * Mothur * * Created by westcott on 9/24/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sharedrabundvectors.hpp" #include "listvector.hpp" #include "getseqscommand.h" #include "getotuscommand.h" #include "taxonomy.hpp" class GetLineageCommand : public Command { public: GetLineageCommand(string); ~GetLineageCommand(){} vector setParameters(); string getCommandName() { return "get.lineage"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.lineage"; } string getDescription() { return "gets sequences from a list, fasta, name, group, alignreport or taxonomy file from a given taxonomy or set of taxonomies"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames, listOfTaxons; string fastafile, namefile, groupfile, alignfile, countfile, listfile, taxfile, taxons, sharedfile, constaxonomy, label; bool abort, dups; string readTax(); string readConsTax(); int runGetOTUs(string); int runGetSeqs(string); }; #endif mothur-1.48.0/source/commands/getlistcountcommand.cpp000077500000000000000000000174121424121717000230210ustar00rootroot00000000000000/* * getlistcountcommand.cpp * Mothur * * Created by westcott on 10/12/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "getlistcountcommand.h" //********************************************************************************************************************** vector GetListCountCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","otu",false,true, true); parameters.push_back(plist); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter parasort("sort", "Multiple", "name-otu", "otu", "", "", "","",false,false); parameters.push_back(parasort); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["otu"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetListCountCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.otulist command parameters are list, sort and label. list is required, unless you have a valid current list file.\n"; helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n"; helpString += "The sort parameter allows you to select how you want the output displayed. Options are otu and name.\n"; helpString += "If otu is selected the output will be otu number followed by the list of names in that otu.\n"; helpString += "If name is selected the output will be a sequence name followed by its otu number.\n"; helpString += "The get.otulist command should be in the following format: get.otulist(list=yourlistFile, label=yourLabels).\n"; helpString += "Example get.otulist(list=amazon.fn.list, label=0.10).\n"; helpString += "The default value for label is all lines in your inputfile.\n"; helpString += "The get.otulist command outputs a .otu file for each distance you specify listing the bin number and the names of the sequences in that bin.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetListCountCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "otu") { pattern = "[filename],[tag],otu"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetListCountCommand::GetListCountCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current list file and the list parameter is required.\n"); abort = true; } } else if (listfile == "not open") { abort = true; } else { current->setListFile(listfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... sort = validParameter.valid(parameters, "sort"); if (sort == "not found") { sort = "otu"; } if ((sort != "otu") && (sort != "name")) { m->mothurOut( sort + " is not a valid sort option. Options are otu and name. I will use otu.\n"); sort = "otu"; } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } } } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "GetListCountCommand"); exit(1); } } //********************************************************************************************************************** int GetListCountCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "execute"); exit(1); } } //********************************************************************************************************************** //return 1 if error, 0 otherwise void GetListCountCommand::process(ListVector* list) { try { string binnames; if (outputdir == "") { outputdir += util.hasPath(listfile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("otu", variables); util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["otu"].push_back(outputFileName); m->mothurOut(list->getLabel()); m->mothurOutEndLine(); //for each bin in the list vector vector binLabels = list->getLabels(); for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { break; } binnames = list->get(i); if (sort == "otu") { out << binLabels[i] << '\t' << binnames << endl; }else{ //sort = name vector names; util.splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { out << names[j] << '\t' << binLabels[i] << endl; } } } out.close(); } catch(exception& e) { m->errorOut(e, "GetListCountCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getlistcountcommand.h000077500000000000000000000022301424121717000224560ustar00rootroot00000000000000#ifndef GETLISTCOUNTCOMMAND_H #define GETLISTCOUNTCOMMAND_H /* * getlistcountcommand.h * Mothur * * Created by westcott on 10/12/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "listvector.hpp" /**********************************************************/ class GetListCountCommand : public Command { public: GetListCountCommand(string); ~GetListCountCommand(){} vector setParameters(); string getCommandName() { return "get.otulist"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getCitation() { return "http://www.mothur.org/wiki/Get.otulist"; } string getDescription() { return "lists each OTU number and the sequence contained in that OTU"; } string getHelpString(); string getOutputPattern(string); int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; set labels; //holds labels to be used string label, listfile, sort; ofstream out; vector outputNames; void process(ListVector*); }; /**********************************************************/ #endif mothur-1.48.0/source/commands/getmetacommunitycommand.cpp000066400000000000000000001177531424121717000236760ustar00rootroot00000000000000// // getmetacommunitycommand.cpp // Mothur // // Created by SarahsWork on 4/9/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "getmetacommunitycommand.h" #include "communitytype.h" #include "kmeans.h" #include "validcalculator.h" #include "subsample.h" //********************************************************************************************************************** vector GetMetaCommunityCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod); abort = false; calledHelp = false; allLines=true; vector tempOutNames; outputTypes["fit"] = tempOutNames; outputTypes["relabund"] = tempOutNames; outputTypes["matrix"] = tempOutNames; outputTypes["design"] = tempOutNames; outputTypes["parameters"] = tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "NewCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetMetaCommunityCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap, subsample, withreplacement. The shared file is required. \n"; helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n"; helpString += "The method parameter allows you to select the method you would like to use. Options are dmm, kmeans and pam. Default=dmm.\n"; helpString += "The calc parameter allows you to select the calculator you would like to use to calculate the distance matrix used by the pam and kmeans method. By default the rjsd calculator is used.\n"; helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matrix for the pam and kmeans method.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam and kmeans methods.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The minpartitions parameter is used to .... Default=5.\n"; helpString += "The maxpartitions parameter is used to .... Default=10.\n"; helpString += "The optimizegap parameter is used to .... Default=3.\n"; helpString += "The get.communitytype command should be in the following format: get.communitytype(shared=yourSharedFile).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetMetaCommunityCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fit") { pattern = "[filename],[distance],[method],mix.fit"; } else if (type == "relabund") { pattern = "[filename],[distance],[method],[tag],mix.relabund"; } else if (type == "design") { pattern = "[filename],[distance],[method],mix.design"; } else if (type == "matrix") { pattern = "[filename],[distance],[method],[tag],mix.posterior"; } else if (type == "parameters") { pattern = "[filename],[distance],[method],mix.parameters"; } else if (type == "summary") { pattern = "[filename],[distance],[method],mix.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetMetaCommunityCommand::GetMetaCommunityCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } string temp = validParameter.valid(parameters, "minpartitions"); if (temp == "not found"){ temp = "5"; } util.mothurConvert(temp, minpartitions); temp = validParameter.valid(parameters, "maxpartitions"); if (temp == "not found"){ temp = "10"; } util.mothurConvert(temp, maxpartitions); temp = validParameter.valid(parameters, "optimizegap"); if (temp == "not found"){ temp = "3"; } util.mothurConvert(temp, optimizegap); string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "dmm"; } if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { } else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam.\n"); abort = true; } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "rjsd"; } else { if (calc == "default") { calc = "rjsd"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); } temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (subsample == false) { iters = 0; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand"); exit(1); } } //********************************************************************************************************************** int GetMetaCommunityCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (subsample) { if (subsampleSize == -1) { //user has not set size, set size = smallest samples size subsampleSize = lookup->getNumSeqsSmallestGroup(); }else { lookup->removeGroups(subsampleSize); Groups = lookup->getNamesGroups(); } if (lookup->size() < 2) { m->mothurOut("[ERROR]: You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return 0; } } //check minpartitions and maxpartitions to ensure if (lookup->size() < maxpartitions) { m->mothurOut("\n\n[NOTE]: This command is designed to be run with datasets containing > 50 samples.\n\n"); m->mothurOut("[WARNING]: You have not provided enough valid groups, for maxpartitions=" + toString(maxpartitions) + ". Reducing maxpartitions to " + toString(lookup->size()) + ".\n"); maxpartitions = lookup->size(); if (minpartitions > lookup->size()) { minpartitions = lookup->size(); m->mothurOut("[WARNING]: You have not provided enough valid groups, for minpartitions=" + toString(minpartitions) + ". Reducing minpartitions to " + toString(lookup->size()) + ".\n"); } m->mothurOut("\n\n"); }else if (lookup->size() < minpartitions) { m->mothurOut("[NOTE]: This command is designed to be run with datasets containing > 50 samples.\n"); minpartitions = lookup->size(); m->mothurOut("[WARNING]: You have not provided enough valid groups, for minpartitions=" + toString(minpartitions) + ". Reducing minpartitions to " + toString(lookup->size()) + ".\n"); if (maxpartitions > lookup->size()) { maxpartitions = lookup->size(); m->mothurOut("[WARNING]: You have not provided enough valid groups, for maxpartitions=" + toString(maxpartitions) + ". Reducing maxpartitions to " + toString(lookup->size()) + ".\n"); } m->mothurOut("\n\n"); } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; return 0; } createProcesses(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetMetaCommunityCommand::createProcesses(SharedRAbundVectors*& thislookup){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = thislookup->getLabel(); variables["[method]"] = method; string outputFileName = getOutputFileName("fit", variables); outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName); //force 1 processor vector< vector > dividedPartitions; vector< vector > rels, matrix; vector doneFlags; dividedPartitions.resize(1); rels.resize(1); matrix.resize(1); int minPartition = 0; for (int i=1; i<=maxpartitions; i++) { int processToAssign = 1; dividedPartitions[(processToAssign-1)].push_back(i); variables["[tag]"] = toString(i); string relName = getOutputFileName("relabund", variables); string mName = getOutputFileName("matrix", variables); rels[(processToAssign-1)].push_back(relName); matrix[(processToAssign-1)].push_back(mName); } m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0); if (m->getControl_pressed()) { return 0; } if (m->getDebug()) { m->mothurOut("[DEBUG]: minPartition = " + toString(minPartition) + "\n"); } //run generate Summary function for smallest minPartition variables["[tag]"] = toString(minPartition); vector piValues = generateDesignFile(minPartition, variables); if (method == "dmm") { generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file return 0; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** int GetMetaCommunityCommand::processDriver(SharedRAbundVectors*& thislookup, vector& parts, string outputFileName, vector relabunds, vector matrix, vector doneFlags, int processID){ try { double minLaplace = 1e10; int minPartition = 1; vector minSilhouettes; minSilhouettes.resize(thislookup->size(), 0); vector namesOfGroups = thislookup->getNamesGroups(); ofstream fitData, silData; if (method == "dmm") { util.openOutputFile(outputFileName, fitData); fitData.setf(ios::fixed, ios::floatfield); fitData.setf(ios::showpoint); fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value minLaplace = 0; util.openOutputFile(outputFileName, silData); silData.setf(ios::fixed, ios::floatfield); silData.setf(ios::showpoint); silData << "K\tCH"; for (int i = 0; i < namesOfGroups.size(); i++) { silData << '\t' << namesOfGroups[i]; } silData << endl; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); vector< vector > sharedMatrix; vector thisGroups = namesOfGroups; for (int i = 0; i < thisGroups.size(); i++) { RAbundVector rav = thislookup->getRAbundVector(thisGroups[i]); sharedMatrix.push_back(rav.get()); } vector< vector > dists; //do we want to output this matrix?? if ((method == "pam") || (method == "kmeans")) { dists = generateDistanceMatrix(thislookup); } if (m->getDebug()) { m->mothurOut("[DEBUG]: dists = \n"); for (int i = 0; i < dists.size(); i++) { if (m->getControl_pressed()) { break; } m->mothurOut("[DEBUG]: i = " + toString(i) + '\t'); for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); } m->mothurOut("\n"); } } for(int i=0;igetDebug()) { m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); } if (m->getControl_pressed()) { break; } //check to see if anyone else is done for (int j = 0; j < doneFlags.size(); j++) { if (!util.isBlank(doneFlags[j])) { //another process has finished //are they done at a lower partition? ifstream in; util.openInputFile(doneFlags[j], in); int tempNum; in >> tempNum; in.close(); if (tempNum < numPartitions) { break; } //quit, because someone else has finished } } CommunityTypeFinder* finder = nullptr; if (method == "dmm") { finder = new qFinderDMM(sharedMatrix, numPartitions); } else if (method == "kmeans") { finder = new KMeans(sharedMatrix, numPartitions); } else if (method == "pam") { finder = new Pam(sharedMatrix, dists, numPartitions); } else { if (i == 0) { m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); } finder = new qFinderDMM(sharedMatrix, numPartitions); } string relabund = relabunds[i]; string matrixName = matrix[i]; outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName); finder->printZMatrix(matrixName, thisGroups); double chi; vector silhouettes; if (method == "dmm") { double laplace = finder->getLaplace(); if(laplace < minLaplace){ minPartition = numPartitions; minLaplace = laplace; } }else { chi = finder->calcCHIndex(dists); silhouettes = finder->calcSilhouettes(dists); if (chi > minLaplace) { //save partition with maximum ch index score minPartition = numPartitions; minLaplace = chi; minSilhouettes = silhouettes; } } if (method == "dmm") { finder->printFitData(cout, minLaplace); finder->printFitData(fitData); vector currentLabels = thislookup->getOTUNames(); finder->printRelAbund(relabund, currentLabels); outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values finder->printSilData(cout, chi, silhouettes); finder->printSilData(silData, chi, silhouettes); if (method == "kmeans") { vector currentLabels = thislookup->getOTUNames(); finder->printRelAbund(relabund, currentLabels); outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); } } delete finder; if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ string tempDoneFile = util.getRootName(util.getSimpleName(sharedfile)) + toString(processID) + ".done.temp"; ofstream outDone; util.openOutputFile(tempDoneFile, outDone); outDone << minPartition << endl; outDone.close(); break; } } if (method == "dmm") { fitData.close(); } if (m->getControl_pressed()) { return 0; } return minPartition; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "processDriver"); exit(1); } } /**************************************************************************************************/ vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, map variables){ try { vector piValues(numPartitions, 0); ifstream postFile; variables["[tag]"] = toString(numPartitions); string input = getOutputFileName("matrix", variables); util.openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file variables.erase("[tag]"); string outputFileName = getOutputFileName("design", variables); ofstream designFile; util.openOutputFile(outputFileName, designFile); outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName); vector titles(numPartitions); for(int i=0;i> titles[i]; } double posterior; string sampleName; int numSamples = 0; while(postFile){ if (m->getControl_pressed()) { break; } double maxPosterior = 0.0000; int maxPartition = -1; postFile >> sampleName; for(int i=0;i> posterior; if(posterior > maxPosterior){ maxPosterior = posterior; maxPartition = i; } piValues[i] += posterior; } designFile << sampleName << '\t' << titles[maxPartition] << endl; numSamples++; gobble(postFile); } for(int i=0;ierrorOut(e, "GetMetaCommunityCommand", "generateDesignFile"); exit(1); } } /**************************************************************************************************/ inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; } /**************************************************************************************************/ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map v, vector piValues){ try { vector summary; vector pMean(numPartitions, 0); vector pLCI(numPartitions, 0); vector pUCI(numPartitions, 0); string name, header; double mean, lci, uci; ifstream referenceFile; map variables; variables["[filename]"] = v["[filename]"]; variables["[distance]"] = v["[distance]"]; variables["[method]"] = method; variables["[tag]"] = "1"; string reference = getOutputFileName("relabund", variables); util.openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str()); variables["[tag]"] = toString(numPartitions); string partFile = getOutputFileName("relabund", variables); ifstream partitionFile; util.openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str()); header = util.getline(referenceFile); header = util.getline(partitionFile); stringstream head(header); string dummy, label; head >> dummy; vector thetaValues(numPartitions, ""); for(int i=0;i> label >> dummy >> dummy; thetaValues[i] = label.substr(label.find_last_of('_')+1); } vector partitionDiff(numPartitions, 0.0000); while(referenceFile){ if (m->getControl_pressed()) { break; } referenceFile >> name >> mean >> lci >> uci; summaryData tempData; tempData.name = name; tempData.refMean = mean; double difference = 0.0000; partitionFile >> name; for(int j=0;j> pMean[j] >> pLCI[j] >> pUCI[j]; difference += abs(mean - pMean[j]); partitionDiff[j] += abs(mean - pMean[j]);; } tempData.partMean = pMean; tempData.partLCI = pLCI; tempData.partUCI = pUCI; tempData.difference = difference; summary.push_back(tempData); gobble(referenceFile); gobble(partitionFile); } referenceFile.close(); partitionFile.close(); if (m->getControl_pressed()) { return 0; } int numOTUs = (int)summary.size(); sort(summary.begin(), summary.end(), summaryFunction); variables.erase("[tag]"); string outputFileName = getOutputFileName("parameters", variables); outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName); ofstream parameterFile; util.openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str()); parameterFile.setf(ios::fixed, ios::floatfield); parameterFile.setf(ios::showpoint); double totalDifference = 0.0000; parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n"; for(int i=0;igetControl_pressed()) { break; } parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl; totalDifference += partitionDiff[i]; } parameterFile.close(); if (m->getControl_pressed()) { return 0; } string summaryFileName = getOutputFileName("summary", variables); outputNames.push_back(summaryFileName); outputTypes["summary"].push_back(summaryFileName); ofstream summaryFile; util.openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str()); summaryFile.setf(ios::fixed, ios::floatfield); summaryFile.setf(ios::showpoint); summaryFile << "OTU\tP0.mean"; for(int i=0;igetControl_pressed()) { break; } summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean; for(int j=0;jerrorOut(e, "GetMetaCommunityCommand", "generateSummaryFile"); exit(1); } } //********************************************************************************************************************** vector > GetMetaCommunityCommand::generateDistanceMatrix(SharedRAbundVectors*& thisLookup){ try { vector > results; Calculator* matrixCalculator; ValidCalculators validCalculator; int i = 0; if (validCalculator.isValidCalculator("matrix", Estimators[i]) ) { if (Estimators[i] == "sharedsobs") { matrixCalculator = new SharedSobsCS(); }else if (Estimators[i] == "sharedchao") { matrixCalculator = new SharedChao1(); }else if (Estimators[i] == "sharedace") { matrixCalculator = new SharedAce(); }else if (Estimators[i] == "jabund") { matrixCalculator = new JAbund(); }else if (Estimators[i] == "sorabund") { matrixCalculator = new SorAbund(); }else if (Estimators[i] == "jclass") { matrixCalculator = new Jclass(); }else if (Estimators[i] == "sorclass") { matrixCalculator = new SorClass(); }else if (Estimators[i] == "jest") { matrixCalculator = new Jest(); }else if (Estimators[i] == "sorest") { matrixCalculator = new SorEst(); }else if (Estimators[i] == "thetayc") { matrixCalculator = new ThetaYC(); }else if (Estimators[i] == "thetan") { matrixCalculator = new ThetaN(); }else if (Estimators[i] == "kstest") { matrixCalculator = new KSTest(); }else if (Estimators[i] == "sharednseqs") { matrixCalculator = new SharedNSeqs(); }else if (Estimators[i] == "ochiai") { matrixCalculator = new Ochiai(); }else if (Estimators[i] == "anderberg") { matrixCalculator = new Anderberg(); }else if (Estimators[i] == "kulczynski") { matrixCalculator = new Kulczynski(); }else if (Estimators[i] == "kulczynskicody") { matrixCalculator = new KulczynskiCody(); }else if (Estimators[i] == "lennon") { matrixCalculator = new Lennon(); }else if (Estimators[i] == "morisitahorn") { matrixCalculator = new MorHorn(); }else if (Estimators[i] == "braycurtis") { matrixCalculator = new BrayCurtis(); }else if (Estimators[i] == "whittaker") { matrixCalculator = new Whittaker(); }else if (Estimators[i] == "odum") { matrixCalculator = new Odum(); }else if (Estimators[i] == "canberra") { matrixCalculator = new Canberra(); }else if (Estimators[i] == "structeuclidean") { matrixCalculator = new StructEuclidean(); }else if (Estimators[i] == "structchord") { matrixCalculator = new StructChord(); }else if (Estimators[i] == "hellinger") { matrixCalculator = new Hellinger(); }else if (Estimators[i] == "manhattan") { matrixCalculator = new Manhattan(); }else if (Estimators[i] == "structpearson") { matrixCalculator = new StructPearson(); }else if (Estimators[i] == "soergel") { matrixCalculator = new Soergel(); }else if (Estimators[i] == "spearman") { matrixCalculator = new Spearman(); }else if (Estimators[i] == "structkulczynski") { matrixCalculator = new StructKulczynski(); }else if (Estimators[i] == "speciesprofile") { matrixCalculator = new SpeciesProfile(); }else if (Estimators[i] == "hamming") { matrixCalculator = new Hamming(); }else if (Estimators[i] == "structchi2") { matrixCalculator = new StructChi2(); }else if (Estimators[i] == "gower") { matrixCalculator = new Gower(); }else if (Estimators[i] == "memchi2") { matrixCalculator = new MemChi2(); }else if (Estimators[i] == "memchord") { matrixCalculator = new MemChord(); }else if (Estimators[i] == "memeuclidean") { matrixCalculator = new MemEuclidean(); }else if (Estimators[i] == "mempearson") { matrixCalculator = new MemPearson(); }else if (Estimators[i] == "jsd") { matrixCalculator = new JSD(); }else if (Estimators[i] == "rjsd") { matrixCalculator = new RJSD(); }else { m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->setControl_pressed(true); return results; } } //calc distances vector< vector< vector > > calcDistsTotals; //each iter, then each groupCombos dists. this will be used to make .dist files vector< vector > calcDists; calcDists.resize(1); SubSample sample; for (int thisIter = 0; thisIter < iters+1; thisIter++) { SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*thisLookup); vector namesOfGroups = thisItersLookup->getNamesGroups(); if (subsample && (thisIter != 0)) { if (withReplacement) { sample.getSampleWithReplacement(thisItersLookup, subsampleSize); } else { sample.getSample(thisItersLookup, subsampleSize); } } driver(thisItersLookup, calcDists, matrixCalculator); if (subsample && (thisIter != 0)) { if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); } calcDistsTotals.push_back(calcDists); for (int i = 0; i < calcDists.size(); i++) { for (int j = 0; j < calcDists[i].size(); j++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + namesOfGroups[calcDists[i][j].seq1] + " - " + namesOfGroups[calcDists[i][j].seq2] + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } } } }else { //print results for whole dataset for (int i = 0; i < calcDists.size(); i++) { if (m->getControl_pressed()) { break; } //initialize matrix results.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { results[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; results[row][column] = dist; results[column][row] = dist; } } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } delete thisItersLookup; } if (iters != 0) { //we need to find the average distance and standard deviation for each groups distance vector< vector > calcAverages = util.getAverages(calcDistsTotals, "average"); //print results for (int i = 0; i < calcDists.size(); i++) { results.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { results[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; results[row][column] = dist; results[column][row] = dist; } } } delete matrixCalculator; return results; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix"); exit(1); } } /**************************************************************************************************/ int GetMetaCommunityCommand::driver(SharedRAbundVectors*& thisLookup, vector< vector >& calcDists, Calculator* matrixCalculator) { try { vector data = thisLookup->getSharedRAbundVectors(); vector subset; for (int k = 0; k < data.size(); k++) { // pass cdd each set of groups to compare for (int l = 0; l < k; l++) { if (k != l) { //we dont need to similiarity of a groups to itself subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds subset.push_back(data[k]); subset.push_back(data[l]); //if this calc needs all groups to calculate the pair load all groups if (matrixCalculator->getNeedsAll()) { //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < data.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(data[w]); } } } vector tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs if (m->getControl_pressed()) { for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); return 1; } seqDist temp(l, k, tempdata[0]); calcDists[0].push_back(temp); } } } for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); return 0; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "driver"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getmetacommunitycommand.h000077500000000000000000000065731424121717000233430ustar00rootroot00000000000000// // getmetacommunitycommand.h // Mothur // // Created by SarahsWork on 4/9/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_getmetacommunitycommand_h #define Mothur_getmetacommunitycommand_h #include "command.hpp" #include "inputdata.h" #include "qFinderDMM.h" #include "pam.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharednseqs.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedkstest.h" #include "whittaker.h" #include "sharedochiai.h" #include "sharedanderbergs.h" #include "sharedkulczynski.h" #include "sharedkulczynskicody.h" #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" //#include "sharedjackknife.h" #include "whittaker.h" #include "odum.h" #include "canberra.h" #include "structeuclidean.h" #include "structchord.h" #include "hellinger.h" #include "manhattan.h" #include "structpearson.h" #include "soergel.h" #include "spearman.h" #include "structkulczynski.h" #include "structchi2.h" #include "speciesprofile.h" #include "hamming.h" #include "gower.h" #include "memchi2.h" #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" #include "sharedrjsd.h" /**************************************************************************************************/ class GetMetaCommunityCommand : public Command { public: GetMetaCommunityCommand(string); ~GetMetaCommunityCommand(){} vector setParameters(); string getCommandName() { return "get.communitytype"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "Holmes I, Harris K, Quince C (2012) Dirichlet Multinomial Mixtures: Generative Models for Microbial Metagenomics. PLoS ONE 7(2): e30126. doi:10.1371/journal.pone.0030126 http://www.mothur.org/wiki/get.communitytype"; } string getDescription() { return "Assigns samples to bins using a Dirichlet multinomial mixture model"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, subsample, withReplacement; vector outputNames; string sharedfile, method, calc; int minpartitions, maxpartitions, optimizegap, iters, subsampleSize; vector Groups, Estimators; set labels; vector > generateDistanceMatrix(SharedRAbundVectors*& lookup); int driver(SharedRAbundVectors*& thisLookup, vector< vector >& calcDists, Calculator*); int processDriver(SharedRAbundVectors*&, vector&, string, vector, vector, vector, int); int createProcesses(SharedRAbundVectors*&); vector generateDesignFile(int, map); int generateSummaryFile(int, map, vector); }; /**************************************************************************************************/ struct summaryData { string name; double refMean, difference; vector partMean, partLCI, partUCI; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/getmimarkspackagecommand.cpp000077500000000000000000005026561424121717000237650ustar00rootroot00000000000000// // getmimarkspackagecommand.cpp // Mothur // // Created by Sarah Westcott on 3/25/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "getmimarkspackagecommand.h" #include "groupmap.h" //********************************************************************************************************************** vector GetMIMarksPackageCommand::setParameters(){ try { //files that have dependancies CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter pfile("file", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pfile); CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter ppackage("package", "Multiple", "air-host_associated-human_associated-human_gut-human_oral-human_skin-human_vaginal-microbial-miscellaneous-plant_associated-sediment-soil-wastewater-water", "miscellaneous", "", "", "","",false,false,true); parameters.push_back(ppackage); CommandParameter prequiredonly("requiredonly", "Boolean", "", "F", "", "", "","",false,false, true); parameters.push_back(prequiredonly); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["tsv"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetMIMarksPackageCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. \n"; helpString += "Further documentation on the different packages and required formats can be found here, http://www.mothur.org/wiki/MIMarks_Data_Packages.\n"; helpString += "The get.mimarkspackage command parameters are: oligos, group, package and requiredonly. oligos or group is required.\n"; helpString += "The oligos parameter is used to provide your oligos file so mothur can extract your group names.\n"; helpString += "The group parameter is used to provide your group file so mothur can extract your group names.\n"; helpString += "The package parameter is used to select the mimarks package you would like to use. The choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or waterc. Default=miscellaneous.\n"; helpString += "The requiredonly parameter is used to indicate you only want the required mimarks feilds printed. Default=F.\n"; helpString += "The get.mimarkspackage command should be in the following format: get.mimarkspackage(oligos=yourOligosFile, package=yourPackage)\n"; helpString += "get.mimarkspackage(oligos=GQY1XT001.oligos, package=human_gut)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetMIMarksPackageCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "tsv") { pattern = "[filename],tsv"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); inputfile = groupfile; } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not found") { oligosfile = ""; setOligosParameter = false; } else if(oligosfile == "not open") { abort = true; } else { current->setOligosFile(oligosfile); inputfile = oligosfile; setOligosParameter = true; } file = validParameter.validFile(parameters, "file"); if (file == "not open") { file = ""; abort = true; } else if (file == "not found") { file = ""; } else { inputfile = file; } if ((groupfile == "") && (oligosfile == "") && (file == "")) { oligosfile = current->getOligosFile(); if (oligosfile != "") { inputfile = oligosfile; m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n"); } else { groupfile = current->getGroupFile(); if (groupfile != "") { inputfile = groupfile; m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { m->mothurOut("[ERROR]: You must provide file, groupfile or oligos file for the get.mimarkspackage command.\n"); abort = true; } } } package = validParameter.valid(parameters, "package"); if (package == "not found") { package = "miscellaneous"; } for (int i = 0; i < package.length(); i++) { package[i] = tolower(package[i]); } if ((package == "air") || (package == "host_associated") || (package == "human_associated") || (package == "human_gut") || (package == "human_oral") || (package == "human_skin") || (package == "human_vaginal") || (package == "microbial") || (package == "miscellaneous") || (package == "plant_associated") || (package == "sediment") || (package == "soil") || (package == "wastewater") || (package == "water") ) {} else { m->mothurOut("[ERROR]: " + package + " is not a valid package selection. Choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water. Aborting.\n."); abort = true; } string temp = validParameter.valid(parameters, "requiredonly"); if(temp == "not found"){ temp = "F"; } requiredonly = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand"); exit(1); } } //********************************************************************************************************************** int GetMIMarksPackageCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if ((oligosfile != "") && (file != "")) { Oligos oligos(oligosfile); createGroupNames(oligos); } else if (file != "") { readFile(); } else if (oligosfile != "") { Oligos oligos(oligosfile); createGroupNames(oligos); } //createGroupNames fills in group names else { GroupMap groupmap(groupfile); groupmap.readMap(); vector tempGroups = groupmap.getNamesOfGroups(); for (int i = 0; i < tempGroups.size(); i++) { Groups.insert(tempGroups[i]); } } if (outputdir == "") { outputdir += util.hasPath(inputfile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); string outputFileName = getOutputFileName("tsv", variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["tsv"].push_back(outputFileName); out << "#This is a tab-delimited file. Additional Documentation can be found at http://www.mothur.org/wiki/MIMarks_Data_Packages." << endl; out << "#Please fill all the required fields indicated with '*'" << endl; out << "#Unknown or inapplicable fields can be assigned 'missing' value." << endl; out << "#You may add extra custom fields to this template. Make sure all the fields are separated by tabs." << endl; out << "#You may remove any fields not required (marked with '*'). Make sure all the fields are separated by tabs." << endl; out << "#You can edit this template using Microsoft Excel or any other editor. But while saving the file please make sure to save them as 'TAB-DELIMITED' TEXT FILE." << endl; if (package == "air") { out << "#MIMARKS.survey.air.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{float} m} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *altitude *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {force per unit area exerted against a surface by the weight of air above that surface} {carbon dioxide (gas) amount or concentration at the time of sampling} {carbon monoxide (gas) amount or concentration at the time of sampling} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {amount of water vapour in the air, at the time of sampling} {methane (gas) amount or concentration at the time of sampling} {any other measurement performed or parameter collected, that is not listed here} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {oxygen (gas) amount or concentration at the time of sampling} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant} {Aerobic or anaerobic} {concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {the amount of solar energy that arrives at a specific area of a surface during a specific time interval} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {temperature of the sample at time of sampling} {ventilation rate of the system in the sampled premises} {ventilation system used in the sampled premises} {concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound} {wind direction is the direction from which a wind originates} {speed of wind measured at the time of sampling}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{float} m} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{float} {unit}} {{text};{interval}} {{text};{float} {unit}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text};{float} {unit}} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{text}} {{text};{float} {unit}} {{text}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *altitude *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon barometric_press carb_dioxide carb_monoxide chem_administration elev humidity methane misc_param organism_count oxy_stat_samp oxygen perturbation pollutants rel_to_oxygen resp_part_matter samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext solar_irradiance source_material_id temp ventilation_rate ventilation_type volatile_org_comp wind_direction wind_speed" << endl; } }else if (package == "built") { out << "#MIMARKS.survey.built.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {actual mass of water vapor - mh20 - present in the air water vapor mixture} {temperature of the air at the time of sampling} {primary function for which a building or discrete part of a building is intended to be used} {location (geography) where a building is set} {carbon dioxide (gas) amount or concentration at the time of sampling} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {device which removes solid particulates or airborne molecular contaminants} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {methods of conditioning or heating a room or building} {a distinguishable space within a structure, the purpose for which discrete areas of a building is used} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light} {number of occupants present at time of sample within the given space} {average number of occupants at time of sampling per square footage} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air} {customary or normal state of the space} {customary or normal density of occupants} {ventilation system used in the sampled premises}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{float} {unit} [kg|lb]} {{float} {unit} [deg C]} {['', 'office', 'market', 'restaurant', 'residence', 'school', 'residential', 'commercial', 'low rise', 'high rise', 'wood framed', 'health care', 'airport', 'sports complex', 'missing', 'not applicable', 'not collected']} {['', 'urban', 'suburban', 'exurban', 'rural', 'missing', 'not applicable', 'not collected']} {{float} {unit}} {{timestamp}} {{term}} {{term}} {{term}} {['', 'particulate air filter', 'chemical air filter', 'low-MERV pleated media', 'HEPA', 'electrostatic', 'gas-phase or ultraviolet air treatments', 'missing', 'not applicable', 'not collected']} {{term}:{term}:{text}} {['', 'radiant system', 'heat pump', 'forced air system', 'steam forced heat', 'wood stove', 'missing', 'not applicable', 'not collected']} {['', 'bedroom', 'office', 'bathroom', 'foyer', 'kitchen', 'locker room', 'hallway', 'elevator', 'missing', 'not applicable', 'not collected']} {{float} {float}} {['', 'natural light', 'electric light', 'no light', 'missing', 'not applicable', 'not collected']} {{integer}} {{float}} {{text};{float} {unit}} {{float} {unit} [%]} {['', 'typical occupied', 'typically unoccupied', 'missing', 'not applicable', 'not collected']} {{float}} {{text}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *abs_air_humidity *air_temp *build_occup_type *building_setting *carb_dioxide *collection_date *env_biome *env_feature *env_material *filter_type *geo_loc_name *heat_cool_type *indoor_space *lat_lon *light_type *occup_samp *occupant_dens_samp *organism_count *rel_air_humidity *space_typ_state *typ_occupant_dens *ventilation_type" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {actual mass of water vapor - mh20 - present in the air water vapor mixture} {temperature of the air at the time of sampling} {primary function for which a building or discrete part of a building is intended to be used} {location (geography) where a building is set} {carbon dioxide (gas) amount or concentration at the time of sampling} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {device which removes solid particulates or airborne molecular contaminants} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {methods of conditioning or heating a room or building} {a distinguishable space within a structure, the purpose for which discrete areas of a building is used} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light} {number of occupants present at time of sample within the given space} {average number of occupants at time of sampling per square footage} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air} {customary or normal state of the space} {customary or normal density of occupants} {ventilation system used in the sampled premises} {temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water.} {type of indoor surface} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {method by which samples are sorted} {volume (mL) or weight (g) of sample processed for DNA extraction} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level} {contaminant identified on surface} {surfaces: water activity as a function of air and material moisture} {surface materials at the point of sampling} {water held on a surface} {pH measurement of surface} {temperature of the surface at the time of sampling}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{float} {unit} [kg|lb]} {{float} {unit} [deg C]} {['', 'office', 'market', 'restaurant', 'residence', 'school', 'residential', 'commercial', 'low rise', 'high rise', 'wood framed', 'health care', 'airport', 'sports complex', 'missing', 'not applicable', 'not collected']} {['', 'urban', 'suburban', 'exurban', 'rural', 'missing', 'not applicable', 'not collected']} {{float} {unit}} {{timestamp}} {{term}} {{term}} {{term}} {['', 'particulate air filter', 'chemical air filter', 'low-MERV pleated media', 'HEPA', 'electrostatic', 'gas-phase or ultraviolet air treatments', 'missing', 'not applicable', 'not collected']} {{term}:{term}:{text}} {['', 'radiant system', 'heat pump', 'forced air system', 'steam forced heat', 'wood stove', 'missing', 'not applicable', 'not collected']} {['', 'bedroom', 'office', 'bathroom', 'foyer', 'kitchen', 'locker room', 'hallway', 'elevator', 'missing', 'not applicable', 'not collected']} {{float} {float}} {['', 'natural light', 'electric light', 'no light', 'missing', 'not applicable', 'not collected']} {{integer}} {{float}} {{text};{float} {unit}} {{float} {unit} [%]} {['', 'typical occupied', 'typically unoccupied', 'missing', 'not applicable', 'not collected']} {{float}} {{text}} {{float} {unit} [deg C]} {['', 'counter top', 'window', 'wall', 'cabinet', 'ceiling', 'door', 'shelving', 'vent cover']} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {['', 'crawlspace', 'slab on grade', 'basement']} {['', 'dust', 'organic matter', 'particulate matter', 'volatile organic compounds', 'biological contaminants', 'radon', 'nutrients', 'biocides']} {{float} {unit} [%]} {['', 'concrete', 'wood', 'stone', 'tile', 'plastic', 'glass', 'vinyl', 'metal', 'carpet', 'stainless steel', 'paint', 'cinder blocks', 'hay bales', 'stucco', 'adobe']} {{float} {unit}} {{integer [0-14]}} {{float} {unit} [deg C]}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *abs_air_humidity *air_temp *build_occup_type *building_setting *carb_dioxide *collection_date *env_biome *env_feature *env_material *filter_type *geo_loc_name *heat_cool_type *indoor_space *lat_lon *light_type *occup_samp *occupant_dens_samp *organism_count *rel_air_humidity *space_typ_state *typ_occupant_dens *ventilation_type dew_point indoor_surf rel_to_oxygen samp_collect_device samp_mat_process samp_size samp_sort_meth samp_vol_we_dna_ext source_material_id substructure_type surf_air_cont surf_humidity surf_material surf_moisture surf_moisture_ph surf_temp" << endl; } }else if (package == "host_associated") { out << "#MIMARKS.survey.host-associated.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used} {Age of host at the time of sampling} {resting diastolic blood pressureof the host, measured as mm mercury} {resting systolic blood pressure of the host, measured as mm mercury} {original body habitat where the sample was obtained from} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {the color of host} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {measurement of dry mass} {none} {none} {literature reference giving growth conditions of the host} {the height of subject} {taxonomic information subspecies level} {taxonomic rank information below subspecies level, such as variety, form, rank etc.} {content of last meal and time since feeding; can include multiple values} {the length of subject} {description of host life stage} {none} {Gender or physical sex of the host} {morphological shape of host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {the growth substrate of the host} {NCBI taxonomy ID of the host, e.g. 9606} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {any other measurement performed or parameter collected, that is not listed here} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {temperature of the sample at time of sampling}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{float} m} {{term}; {timestamp}} {{float} m} {{float} {unit}} {{boolean};{timestamp}} {{none}} {{float} {unit}} {{float} {unit}} {{term}} {{text}} {{float} {unit}} {{text}} {{text}} {{none}} {{float} {unit}} {{none}} {{none}} {{PMID|DOI|URL}} {{float} {unit}} {{text}} {{text}} {{text};{period}} {{float} {unit}} {{text}} {{none}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{text}} {{text}} {{integer}} {{none}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon altitude chem_administration depth elev gravidity host_age host_blood_press_diast host_blood_press_syst host_body_habitat host_body_product host_body_temp host_color host_diet host_disease host_dry_mass host_family_relationship host_genotype host_growth_cond host_height host_infra_specific_name host_infra_specific_rank host_last_meal host_length host_life_stage host_phenotype host_sex host_shape host_subject_id host_substrate host_taxid host_tissue_sampled host_tot_mass misc_param organism_count oxy_stat_samp perturbation rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext source_material_id temp" << endl; } }else if (package == "human_associated") { out << "#MIMARKS.survey.human-associated.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {specification of the color of the amniotic fluid sample} {history of blood disorders; can include multiple disorders} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {specification of major diet changes in the last six months, if yes the change should be specified} {any drug used by subject and the frequency of usage; can include multiple drugs used} {ethnicity of the subject} {specification of foetal health status, should also include abortion} {specification of the gestation state} {Age of host at the time of sampling} {body mass index of the host, calculated as weight/(height)squared} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {none} {none} {the height of subject} {HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]} {content of last meal and time since feeding; can include multiple values} {most frequent job performed by subject} {none} {resting pulse of the host, measured as beats per minute} {Gender or physical sex of the host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {can include multiple medication codes} {history of kidney disorders; can include multiple disorders} {specification of the maternal health status} {whether full medical history was collected} {any other measurement performed or parameter collected, that is not listed here} {history of nose-throat disorders; can include multiple disorders} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present} {history of pulmonary disorders; can include multiple disorders} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {specification of smoking status} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {specification of study completion status, if no the reason should be specified} {temperature of the sample at time of sampling} {specification of the countries travelled in the last six months; can include multiple travels} {specification of twin sibling presence} {specification of urine collection method} {history of urogenitaltract disorders; can include multiple disorders} {specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{text}} {{text}} {{term}; {timestamp}} {{boolean};{text}} {{text};{integer}/[year|month|week|day|hour]} {{integer|text}} {{text}} {{text}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {{none}} {{none}} {{none}} {{float} {unit}} {{boolean};{boolean}} {{text};{period}} {{none}} {{none}} {{float} {unit}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{none}} {{float} {unit}} {{integer}} {{text}} {{text}} {{boolean}} {{text};{float} {unit}} {{text}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {{boolean};{text}} {{text}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{boolean}} {{text}} {{boolean};[adverse event|non-compliance|lost to follow up|other-specify]} {{float} {unit}} {{text}} {{boolean}} {['', 'clean catch', 'catheter']} {{text}} {{boolean};{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon amniotic_fluid_color blood_blood_disord chem_administration diet_last_six_month drug_usage ethnicity foetal_health_stat gestation_state host_age host_body_mass_index host_body_product host_body_temp host_diet host_disease host_family_relationship host_genotype host_height host_hiv_stat host_last_meal host_occupation host_phenotype host_pulse host_sex host_subject_id host_tissue_sampled host_tot_mass ihmc_medication_code kidney_disord maternal_health_stat medic_hist_perform misc_param nose_throat_disord organism_count oxy_stat_samp perturbation pet_farm_animal pulmonary_disord rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext smoker source_material_id study_complt_stat temp travel_out_six_month twin_sibling urine_collect_meth urogenit_tract_disor weight_loss_3_month" << endl; } }else if (package == "human_gut") { out << "#MIMARKS.survey.human-gut.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {ethnicity of the subject} {history of gastrointestinal tract disorders; can include multiple disorders} {Age of host at the time of sampling} {body mass index of the host, calculated as weight/(height)squared} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {none} {none} {the height of subject} {content of last meal and time since feeding; can include multiple values} {most frequent job performed by subject} {none} {resting pulse of the host, measured as beats per minute} {Gender or physical sex of the host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {can include multiple medication codes} {history of liver disorders; can include multiple disorders} {whether full medical history was collected} {any other measurement performed or parameter collected, that is not listed here} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {specification of special diet; can include multiple special diets} {temperature of the sample at time of sampling}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{term}; {timestamp}} {{integer|text}} {{text}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {{none}} {{none}} {{none}} {{float} {unit}} {{text};{period}} {{none}} {{none}} {{float} {unit}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{none}} {{float} {unit}} {{integer}} {{text}} {{boolean}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {['', 'low carb', 'reduced calorie', 'vegetarian', 'other(to be specified)']} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon chem_administration ethnicity gastrointest_disord host_age host_body_mass_index host_body_product host_body_temp host_diet host_disease host_family_relationship host_genotype host_height host_last_meal host_occupation host_phenotype host_pulse host_sex host_subject_id host_tissue_sampled host_tot_mass ihmc_medication_code liver_disord medic_hist_perform misc_param organism_count oxy_stat_samp perturbation rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext source_material_id special_diet temp" << endl; } }else if (package == "human_oral") { out << "#MIMARKS.survey.human-oral.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {ethnicity of the subject} {Age of host at the time of sampling} {body mass index of the host, calculated as weight/(height)squared} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {none} {none} {the height of subject} {content of last meal and time since feeding; can include multiple values} {most frequent job performed by subject} {none} {resting pulse of the host, measured as beats per minute} {Gender or physical sex of the host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {can include multiple medication codes} {whether full medical history was collected} {any other measurement performed or parameter collected, that is not listed here} {history of nose/mouth/teeth/throat disorders; can include multiple disorders} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {temperature of the sample at time of sampling} {specification of the time since last toothbrushing}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{term}; {timestamp}} {{integer|text}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {{none}} {{none}} {{none}} {{float} {unit}} {{text};{period}} {{none}} {{none}} {{float} {unit}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{none}} {{float} {unit}} {{integer}} {{boolean}} {{text};{float} {unit}} {{text}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{timestamp}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon chem_administration ethnicity host_age host_body_mass_index host_body_product host_body_temp host_diet host_disease host_family_relationship host_genotype host_height host_last_meal host_occupation host_phenotype host_pulse host_sex host_subject_id host_tissue_sampled host_tot_mass ihmc_medication_code medic_hist_perform misc_param nose_mouth_teeth_throat_disord organism_count oxy_stat_samp perturbation rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext source_material_id temp time_last_toothbrush" << endl; } }else if (package == "human_skin") { out << "#MIMARKS.survey.human-skin.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {history of dermatology disorders; can include multiple disorders} {dominant hand of the subject} {ethnicity of the subject} {Age of host at the time of sampling} {body mass index of the host, calculated as weight/(height)squared} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {none} {none} {the height of subject} {content of last meal and time since feeding; can include multiple values} {most frequent job performed by subject} {none} {resting pulse of the host, measured as beats per minute} {Gender or physical sex of the host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {can include multiple medication codes} {whether full medical history was collected} {any other measurement performed or parameter collected, that is not listed here} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {temperature of the sample at time of sampling} {specification of the time since last wash}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{term}; {timestamp}} {{text}} {['', 'left', 'right', 'ambidextrous']} {{integer|text}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {{none}} {{none}} {{none}} {{float} {unit}} {{text};{period}} {{none}} {{none}} {{float} {unit}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{none}} {{float} {unit}} {{integer}} {{boolean}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{timestamp}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon chem_administration dermatology_disord dominant_hand ethnicity host_age host_body_mass_index host_body_product host_body_temp host_diet host_disease host_family_relationship host_genotype host_height host_last_meal host_occupation host_phenotype host_pulse host_sex host_subject_id host_tissue_sampled host_tot_mass ihmc_medication_code medic_hist_perform misc_param organism_count oxy_stat_samp perturbation rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext source_material_id temp time_since_last_wash" << endl; } }else if (package == "human_vaginal") { out << "#MIMARKS.survey.human-vaginal.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {specification of birth control medication used} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {date of most recent douche} {ethnicity of the subject} {history of gynecological disorders; can include multiple disorders} {Age of host at the time of sampling} {body mass index of the host, calculated as weight/(height)squared} {substance produced by the host, e.g. stool, mucus, where the sample was obtained from} {core body temperature of the host when sample was collected} {type of diet depending on the sample for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {none} {none} {the height of subject} {content of last meal and time since feeding; can include multiple values} {most frequent job performed by subject} {none} {resting pulse of the host, measured as beats per minute} {Gender or physical sex of the host} {a unique identifier by which each subject can be referred to, de-identified, e.g. #131} {Type of tissue the initial sample was taken from. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1005)} {total mass of the host at collection, the unit depends on host} {whether subject had hormone replacement theraphy, and if yes start date} {specification of whether hysterectomy was performed} {can include multiple medication codes} {whether full medical history was collected} {date of most recent menstruation} {date of onset of menopause} {any other measurement performed or parameter collected, that is not listed here} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {date due of pregnancy} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {current sexual partner and frequency of sex} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {temperature of the sample at time of sampling} {history of urogenital disorders, can include multiple disorders}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{text}} {{term}; {timestamp}} {{timestamp}} {{integer|text}} {{text}} {{none}} {{float} {unit}} {{text}} {{float} {unit}} {{text}} {{none}} {{none}} {{none}} {{float} {unit}} {{text};{period}} {{none}} {{none}} {{float} {unit}} {['', 'male', 'female', 'pooled male and female', 'neuter', 'hermaphrodite', 'not determined', 'missing', 'not applicable', 'not collected']} {{text}} {{none}} {{float} {unit}} {{timestamp}} {{boolean}} {{integer}} {{boolean}} {{timestamp}} {{timestamp}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {{timestamp}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {{text}} {{float} {unit}} {{text}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon birth_control chem_administration douche ethnicity gynecologic_disord host_age host_body_mass_index host_body_product host_body_temp host_diet host_disease host_family_relationship host_genotype host_height host_last_meal host_occupation host_phenotype host_pulse host_sex host_subject_id host_tissue_sampled host_tot_mass hrt hysterectomy ihmc_medication_code medic_hist_perform menarche menopause misc_param organism_count oxy_stat_samp perturbation pregnancy rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext sexual_act source_material_id temp urogenit_disord" << endl; } }else if (package == "microbial") { out << "#MIMARKS.survey.microbial.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate} {concentration of alkyl diethers} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {measurement of aminopeptidase activity} {concentration of ammonium} {measurement of bacterial carbon production} {amount of biomass; should include the name for the part of biomass measured, e.g. microbial, total. can include multiple measurements} {concentration of bishomohopanol} {concentration of bromide} {concentration of calcium} {ratio of amount or concentrations of carbon to nitrogen} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {concentration of chloride} {concentration of chlorophyll} {concentration of diether lipids; can include multiple types of diether lipids} {concentration of dissolved carbon dioxide} {concentration of dissolved hydrogen} {dissolved inorganic carbon concentration} {concentration of dissolved organic carbon} {dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2} {concentration of dissolved oxygen} {measurement of glucosidase activity} {concentration of magnesium} {measurement of mean friction velocity} {measurement of mean peak friction velocity} {methane (gas) amount or concentration at the time of sampling} {any other measurement performed or parameter collected, that is not listed here} {concentration of n-alkanes; can include multiple n-alkanes} {concentration of nitrate} {concentration of nitrite} {concentration of nitrogen (total)} {concentration of organic carbon} {concentration of organic matter} {concentration of organic nitrogen} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {concentration of particulate organic carbon} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {concentration of petroleum hydrocarbon} {pH measurement} {concentration of phaeopigments; can include multiple phaeopigments} {concentration of phosphate} {concentration of phospholipid fatty acids; can include multiple values} {concentration of potassium} {pressure to which the sample is subject, in atmospheres} {redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential} {Aerobic or anaerobic} {salinity measurement} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {concentration of silicate} {sodium concentration} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {concentration of sulfate} {concentration of sulfide} {temperature of the sample at time of sampling} {total carbon content} {total nitrogen content of the sample} {Definition for soil: total organic C content of the soil units of g C/kg soil. Definition otherwise: total organic carbon content} {turbidity measurement} {water content measurement}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} {unit}} {{float} m} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{float} {unit}} {{text};{interval}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{float} {unit}} {{text}} {{text|term}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon alkalinity alkyl_diethers altitude aminopept_act ammonium bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_carb diss_org_nitro diss_oxygen glucosidase_act magnesium mean_frict_vel mean_peak_frict_vel methane misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp part_org_carb perturbation petroleum_hydrocarb ph phaeopigments phosphate phosplipid_fatt_acid potassium pressure redox_potential rel_to_oxygen salinity samp_collect_device samp_mat_process samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext silicate sodium source_material_id sulfate sulfide temp tot_carb tot_nitro tot_org_carb turbidity water_content" << endl; } }else if (package == "miscellaneous") { out << "#MIMARKS.survey.miscellaneous.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {concentration of ammonium} {amount of biomass; should include the name for the part of biomass measured, e.g. microbial, total. can include multiple measurements} {concentration of bromide} {concentration of calcium} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {concentration of chloride} {concentration of chlorophyll} {density of sample} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {concentration of diether lipids; can include multiple types of diether lipids} {concentration of dissolved carbon dioxide} {concentration of dissolved hydrogen} {dissolved inorganic carbon concentration} {dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2} {concentration of dissolved oxygen} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {any other measurement performed or parameter collected, that is not listed here} {concentration of nitrate} {concentration of nitrite} {concentration of nitrogen (total)} {concentration of organic carbon} {concentration of organic matter} {concentration of organic nitrogen} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {pH measurement} {concentration of phosphate} {concentration of phospholipid fatty acids; can include multiple values} {concentration of potassium} {pressure to which the sample is subject, in atmospheres} {Aerobic or anaerobic} {salinity measurement} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {concentration of silicate} {sodium concentration} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {concentration of sulfate} {concentration of sulfide} {temperature of the sample at time of sampling} {measurement of magnitude and direction of flow within a fluid}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} m} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} m} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{float} {unit}} {{text}} {{text|term}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon alkalinity altitude ammonium biomass bromide calcium chem_administration chloride chlorophyll density depth diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_nitro diss_oxygen elev misc_param nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp perturbation ph phosphate phosplipid_fatt_acid potassium pressure rel_to_oxygen salinity samp_collect_device samp_mat_process samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext silicate sodium source_material_id sulfate sulfide temp water_current" << endl; } }else if (package == "plant_associated") { out << "#MIMARKS.survey.plant-associated.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, \"Homo sapiens\".} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {information about treatment involving an exposure to varying temperatures; should include the temperature, treatment duration, interval and total experimental duration; can include different temperature regimens} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment duration, interval and total experimental duration; can include multiple antibiotic regimens} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment duration, interval and total experimental duration; can include multiple mutagen regimens} {treatment involving an exposure to a particular climate; can include multiple climates} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {information about treatment involving the use of fertilizers; should include the name fertilizer, amount administered, treatment duration, interval and total experimental duration; can include multiple fertilizer regimens} {information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment duration, interval and total experimental duration; can include multiple fungicide regimens} {use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens} {information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; can include multiple treatments} {information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment duration, interval and total experimental duration; can include multiple growth hormone regimens} {information about growth media for growing the plants or tissue cultured samples} {information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment duration, interval and total experimental duration; can include multiple regimens} {Age of host at the time of sampling} {Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, http://bioportal.bioontology.org/ontologies/1009 or http://www.ncbi.nlm.nih.gov/mesh} {measurement of dry mass} {none} {the height of subject} {taxonomic information subspecies level} {taxonomic rank information below subspecies level, such as variety, form, rank etc.} {the length of subject} {description of host life stage} {none} {NCBI taxonomy ID of the host, e.g. 9606} {total mass of the host at collection, the unit depends on host} {measurement of wet mass} {information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment duration, interval and total experimental duration; can include multiple regimens} {information about any mechanical damage exerted on the plant; can include multiple damages and sites} {information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment duration, interval and total experimental duration; can include multiple mineral nutrient regimens} {any other measurement performed or parameter collected, that is not listed here} {information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment duration, interval and total experimental duration; can include multiple non-mineral nutrient regimens} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment duration, interval and total experimental duration; can include multiple pesticide regimens} {information about treatment involving exposure of plants to varying levels of pH of the growth media; can include multiple regimen} {name of body site that the sample was obtained from. For Plant Ontology (PO) (v 20) terms, see http://purl.bioontology.org/ontology/PO} {substance produced by the plant, where the sample was obtained from} {information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment duration, interval and total experimental duration; can include multiple radiation regimens} {information about treatment involving an exposure to a given amount of rainfall; can include multiple regimens} {Aerobic or anaerobic} {information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment duration, interval and total experimental duration; can include multiple salt regimens} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {treatment involving an exposure to a particular season (e.g. winter, summer, rabi, rainy etc.)} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water; can include multiple regimens} {temperature of the sample at time of sampling} {description of plant tissue culture growth media used} {information about treatment involving an exposure to water with varying degree of temperature; can include multiple regimens} {information about treatment involving an exposure to watering frequencies; can include multiple regimens}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{none}} {{float} {float}} {{float} {unit};{period};{interval};{period}} {{float} m} {{text};{float} {unit};{period};{interval};{period}} {{term}; {timestamp}} {{text};{float} {unit};{period};{interval};{period}} {{text};{period};{interval};{period}} {{float} m} {{float} {unit}} {{text};{float} {unit};{period};{interval};{period}} {{text};{float} {unit};{period};{interval};{period}} {{text};{float} {unit};{period};{interval};{period}} {{float} {unit};{period};{interval};{period}} {{text};{float} {unit};{period};{interval};{period}} {['', 'soil', 'liquid']} {{text};{float} {unit};{period};{interval};{period}} {{none}} {{none}} {{float} {unit}} {{none}} {{float} {unit}} {{text}} {{text}} {{float} {unit}} {{text}} {{none}} {{integer}} {{float} {unit}} {{float} {unit}} {{float} {unit};{period};{interval};{period}} {{text};{text}} {{text};{float} {unit};{period};{interval};{period}} {{text};{float} {unit}} {{text};{float} {unit};{period};{interval};{period}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {{text};{float} {unit};{period};{interval};{period}} {{float} {unit};{period};{interval};{period}} {{term}} {{text}} {{text};{float} {unit};{period};{interval};{period}} {{float} {unit};{period};{interval};{period}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text};{float} {unit};{period};{interval};{period}} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text};{period};{interval};{period}} {{text}} {{text};{period};{interval};{period}} {{float} {unit}} {{PMID|DOI|URL}} {{float} {unit};{period};{interval};{period}} {{float} {unit};{period};{interval};{period}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *host *lat_lon air_temp_regm altitude antibiotic_regm chem_administration chem_mutagen climate_environment depth elev fertilizer_regm fungicide_regm gaseous_environment gravity growth_hormone_regm growth_med herbicide_regm host_age host_disease host_dry_mass host_genotype host_height host_infra_specific_name host_infra_specific_rank host_length host_life_stage host_phenotype host_taxid host_tot_mass host_wet_mass humidity_regm mechanical_damage mineral_nutr_regm misc_param non_mineral_nutr_regm organism_count oxy_stat_samp perturbation pesticide_regm ph_regm plant_body_site plant_product radiation_regm rainfall_regm rel_to_oxygen salt_regm samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext season_environment source_material_id standing_water_regm temp tiss_cult_growth_med water_temp_regm watering_regm" << endl; } }else if (package == "sediment") { out << "#MIMARKS.survey.sediment.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate} {concentration of alkyl diethers} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {measurement of aminopeptidase activity} {concentration of ammonium} {measurement of bacterial carbon production} {amount of biomass; should include the name for the part of biomass measured, e.g. microbial, total. can include multiple measurements} {concentration of bishomohopanol} {concentration of bromide} {concentration of calcium} {ratio of amount or concentrations of carbon to nitrogen} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {concentration of chloride} {concentration of chlorophyll} {density of sample} {concentration of diether lipids; can include multiple types of diether lipids} {concentration of dissolved carbon dioxide} {concentration of dissolved hydrogen} {dissolved inorganic carbon concentration} {concentration of dissolved organic carbon} {dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2} {concentration of dissolved oxygen} {measurement of glucosidase activity} {concentration of magnesium} {measurement of mean friction velocity} {measurement of mean peak friction velocity} {methane (gas) amount or concentration at the time of sampling} {any other measurement performed or parameter collected, that is not listed here} {concentration of n-alkanes; can include multiple n-alkanes} {concentration of nitrate} {concentration of nitrite} {concentration of nitrogen (total)} {concentration of organic carbon} {concentration of organic matter} {concentration of organic nitrogen} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {concentration of particulate organic carbon} {particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {concentration of petroleum hydrocarbon} {pH measurement} {concentration of phaeopigments; can include multiple phaeopigments} {concentration of phosphate} {concentration of phospholipid fatty acids; can include multiple values} {porosity of deposited sediment is volume of voids divided by the total volume of sample} {concentration of potassium} {pressure to which the sample is subject, in atmospheres} {redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential} {Aerobic or anaerobic} {salinity measurement} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {information about the sediment type based on major constituents} {concentration of silicate} {sodium concentration} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {concentration of sulfate} {concentration of sulfide} {temperature of the sample at time of sampling} {stage of tide} {total carbon content} {total nitrogen content of the sample} {Definition for soil: total organic C content of the soil units of g C/kg soil. Definition otherwise: total organic carbon content} {turbidity measurement} {water content measurement}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} {unit}} {{float} m} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{float} {unit}} {{text};{float} {unit}} {{text};{interval}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{float} {unit}} {{text}} {{text|term}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {['', 'biogenous', 'cosmogenous', 'hydrogenous', 'lithogenous']} {{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'low', 'high']} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon alkalinity alkyl_diethers altitude aminopept_act ammonium bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll density diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_carb diss_org_nitro diss_oxygen glucosidase_act magnesium mean_frict_vel mean_peak_frict_vel methane misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp part_org_carb particle_class perturbation petroleum_hydrocarb ph phaeopigments phosphate phosplipid_fatt_acid porosity potassium pressure redox_potential rel_to_oxygen salinity samp_collect_device samp_mat_process samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext sediment_type silicate sodium source_material_id sulfate sulfide temp tidal_stage tot_carb tot_nitro tot_org_carb turbidity water_content" << endl; } }else if (package == "soil") { out << "#MIMARKS.survey.soil.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {addition of fertilizers, pesticides, etc. - amount and time of applications} {aluminum saturation (esp. for tropical soils)} {reference or method used in determining Al saturation} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {mean annual and seasonal precipitation (mm)} {mean annual and seasonal temperature (oC)} {whether or not crop is rotated, and if yes, rotation schedule} {present state of sample site} {vegetation classification from one or more standard classification systems, or agricultural crop} {reference or method used in vegetation classification} {drainage classification from a standard system such as the USDA system} {unusual physical events that may have affected microbial populations} {measured salinity} {soil classification from the FAO World Reference Database for Soil Resources} {historical and/or physical evidence of fire} {historical and/or physical evidence of flooding} {heavy metals present and concentrationsany drug used by subject and the frequency of usage; can include multiple heavy metals and concentrations} {reference or method used in determining heavy metals} {specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath} {reference or method used in determining the horizon} {none} {link to digitized soil maps or other soil classification information} {link to climate resource} {soil classification based on local soil classification system} {reference or method used in determining the local soil classification} {the part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 µm. IF you keep this, you would need to have correction factors used for conversion to the final units, which should be mg C (or N)/kg soil).} {reference or method used in determining microbial biomass} {any other measurement performed or parameter collected, that is not listed here} {pH measurement} {reference or method used in determining pH} {were multiple DNA extractions mixed? how many?} {previous land use and dates} {reference or method used in determining previous land use and dates} {cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas} {Aerobic or anaerobic} {reference or method used in determining salinity} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {volume (mL) or weight (g) of sample processed for DNA extraction} {collection design of pooled samples and/or sieve size and amount of sample sieved} {the direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., NW or 315°. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration.} {commonly called “slope.” The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer.} {soil series name or other lower-level classification} {reference or method used in determining soil series name or other lower-level classification} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {explain how and for how long the soil sample was stored before DNA extraction.} {the relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (} {reference or method used in determining soil texture} {note method(s) used for tilling} {reference or method used in determining the total N} {total nitrogen content of the sample} {reference or method used in determining total organic C} {Definition for soil: total organic C content of the soil units of g C/kg soil. Definition otherwise: total organic carbon content} {water content (g/g or cm3/cm3)} {reference or method used in determining the water content of soil}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{float} {unit}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{text};{float} {unit};{timestamp}} {{float} {unit}} {{PMID|DOI|URL}} {{float} m} {{float} {unit}} {{float} {unit}} {{boolean};Rn/{timestamp}/{period}} {['', 'cities', 'farmstead', 'industrial areas', 'roads/railroads', 'rock', 'sand', 'gravel', 'mudflats', 'salt flats', 'badlands', 'permanent snow or ice', 'saline seeps', 'mines/quarries', 'oil waste areas', 'small grains', 'row crops', 'vegetable crops', 'horticultural plants (e.g. tulips)', 'marshlands (grass,sedges,rushes)', 'tundra (mosses,lichens)', 'rangeland', 'pastureland (grasslands used for livestock grazing)', 'hayland', 'meadows (grasses,alfalfa,fescue,bromegrass,timothy)', 'shrub land (e.g. mesquite,sage-brush,creosote bush,shrub oak,eucalyptus)', 'successional shrub land (tree saplings,hazels,sumacs,chokecherry,shrub dogwoods,blackberries)', 'shrub crops (blueberries,nursery ornamentals,filberts)', 'vine crops (grapes)', 'conifers (e.g. pine,spruce,fir,cypress)', 'hardwoods (e.g. oak,hickory,elm,aspen)', 'intermixed hardwood and conifers', 'tropical (e.g. mangrove,palms)', 'rainforest (evergreen forest receiving <} {{text}} {{PMID|DOI|URL}} {['', 'very poorly', 'poorly', 'somewhat poorly', 'moderately well', 'well', 'excessively drained']} {{timestamp}} {{float} {unit}} {{term}} {{timestamp}} {{timestamp}} {{text};{float} {unit}} {{PMID|DOI|URL}} {['', 'O horizon', 'A horizon', 'E horizon', 'B horizon', 'C horizon', 'R layer', 'Permafrost']} {{PMID|DOI|URL}} {{PMID|DOI|URL}} {{PMID|DOI|URL}} {{PMID|DOI|URL}} {{text}} {{PMID|DOI|URL}} {{float} {unit}} {{PMID|DOI|URL}} {{text};{float} {unit}} {{float} {unit}} {{PMID|DOI|URL}} {{boolean};{float} {unit}} {{text};{timestamp}} {{PMID|DOI|URL}} {['', 'summit', 'shoulder', 'backslope', 'footslope', 'toeslope']} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{PMID|DOI|URL}} {{text}} {{text|term}} {{float} {unit}} {{float} {unit}} {{{text}|{float} {unit}};{float} {unit}} {{float} {unit}} {{float} {unit}} {{text}} {{PMID|DOI|URL}} {{text}} {{text};{period}} {{float} {unit}} {{PMID|DOI|URL}} {['', 'drill', 'cutting disc', 'ridge till', 'strip tillage', 'zonal tillage', 'chisel', 'tined', 'mouldboard', 'disc plough']} {{PMID|DOI|URL}} {{float} {unit}} {{PMID|DOI|URL}} {{float} {unit}} {{float} [g/g|cm3/cm3]} {{PMID|DOI|URL}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *elev *env_biome *env_feature *env_material *geo_loc_name *lat_lon agrochem_addition al_sat al_sat_meth altitude annual_season_precpt annual_season_temp crop_rotation cur_land_use cur_vegetation cur_vegetation_meth drainage_class extreme_event extreme_salinity fao_class fire flooding heavy_metals heavy_metals_meth horizon horizon_meth link_addit_analys link_class_info link_climate_info local_class local_class_meth microbial_biomass microbial_biomass_meth misc_param ph ph_meth pool_dna_extracts previous_land_use previous_land_use_meth profile_position rel_to_oxygen salinity_meth samp_collect_device samp_mat_process samp_size samp_vol_we_dna_ext sieving slope_aspect slope_gradient soil_type soil_type_meth source_material_id store_cond texture texture_meth tillage tot_n_meth tot_nitro tot_org_c_meth tot_org_carb water_content_soil water_content_soil_meth" << endl; } }else if (package == "wastewater") { out << "#MIMARKS.survey.wastewater.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate} {a measure of the relative oxygen-depletion effect of a waste contaminant} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {a measure of the relative oxygen-depletion effect of a waste contaminant} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {percentage of volatile solids removed from the anaerobic digestor} {amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types} {amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances} {percentage of industrial effluents received by wastewater treatment plant} {concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles} {any other measurement performed or parameter collected, that is not listed here} {concentration of nitrate} {concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc.} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {pH measurement} {concentration of phosphate} {the process of pre-treatment removes materials that can be easily collected from the raw wastewater} {the process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed} {anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage} {Aerobic or anaerobic} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {none} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {the process for substantially degrading the biological content of the sewage} {type of wastewater treatment plant as municipial or industrial} {the time activated sludge remains in reactor} {sodium concentration} {concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc.} {concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc.} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter, etc,; can include multiple substances} {temperature of the sample at time of sampling} {the process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment} {total nitrogen content of the sample} {total amount or concentration of phosphate} {the origin of wastewater such as human waste, rainfall, storm drains, etc.}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} m} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{text};{interval}} {{float} {unit}} {{float} {unit}} {{text}} {{text}} {{text}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{text}} {{text|term}} {{none}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{text}} {{text}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{text}} {{text};{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{text}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *env_biome *env_feature *env_material *geo_loc_name *lat_lon alkalinity biochem_oxygen_dem chem_administration chem_oxygen_dem depth efficiency_percent emulsions gaseous_substances indust_eff_percent inorg_particles misc_param nitrate org_particles organism_count oxy_stat_samp perturbation ph phosphate pre_treatment primary_treatment reactor_type rel_to_oxygen samp_collect_device samp_mat_process samp_salinity samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext secondary_treatment sewage_type sludge_retent_time sodium soluble_inorg_mat soluble_org_mat source_material_id suspend_solids temp tertiary_treatment tot_nitro tot_phosphate wastewater_type" << endl; } }else if (package == "water") { out << "#MIMARKS.survey.water.4.0" << endl; if (requiredonly) { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *env_biome *env_feature *env_material *geo_loc_name *lat_lon" << endl; }else { out << "#{sample name} {description of sample} {sample title} {description of library_construction_protocol} {http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock} {Date of sampling, in \"DD-Mmm-YYYY\", \"Mmm-YYYY\" or \"YYYY\" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard \"YYYY-mm-dd\", \"YYYY-mm\" or \"YYYY-mm-ddThh:mm:ss\" (eg., 1990-10-30, 1990-10 or 1990-10-30T14:41:36)} {Depth is defined as the vertical distance below surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectivly. Depth can be reported as an interval for subsurface samples.} {descriptor of the broad ecological context of a sample. Examples include: desert, taiga, deciduous woodland, or coral reef. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {descriptor of the local environment. Examples include: harbor, cliff, or lake. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {material that was displaced by the sample, or material in which a sample was embedded, prior to the sampling event. Examples include: air, soil, or water. EnvO (v 2013-06-14) terms can be found via the link: www.environmentontology.org/Browse-EnvO} {Geographical origin of the sample; use the appropriate name from this list http://www.insdc.org/documents/country-qualifier-vocabulary. Use a colon to separate the country or ocean from more detailed information about the location, eg \"Canada: Vancouver\" or \"Germany: halfway down Zugspitze, Alps\"} {The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format \"d[d.dddd] N|S d[dd.dddd] W|E\", eg, 38.98 N 77.11 W} {alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate} {concentration of alkyl diethers} {The altitude of the sample is the vertical distance between Earth's surface above Sea Level and the sampled position in the air.} {measurement of aminopeptidase activity} {concentration of ammonium} {measurement of atmospheric data; can include multiple data} {bacterial production in the water column measured by isotope uptake} {measurement of bacterial respiration in the water column} {measurement of bacterial carbon production} {amount of biomass; should include the name for the part of biomass measured, e.g. microbial, total. can include multiple measurements} {concentration of bishomohopanol} {concentration of bromide} {concentration of calcium} {ratio of amount or concentrations of carbon to nitrogen} {list of chemical compounds administered to the host or site where sampling occurred, and when (e.g. antibiotics, N fertilizer, air filter); can include multiple compounds. For Chemical Entities of Biological Interest ontology (CHEBI) (v1.72), please see http://bioportal.bioontology.org/visualize/44603} {concentration of chloride} {concentration of chlorophyll} {electrical conductivity of water} {density of sample} {concentration of diether lipids; can include multiple types of diether lipids} {concentration of dissolved carbon dioxide} {concentration of dissolved hydrogen} {dissolved inorganic carbon concentration} {concentration of dissolved inorganic nitrogen} {concentration of dissolved inorganic phosphorus} {concentration of dissolved organic carbon} {dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2} {concentration of dissolved oxygen} {visible waveband radiance and irradiance measurements in the water column} {The elevation of the sampling site as measured by the vertical distance from mean sea level.} {raw or converted fluorescence of water} {measurement of glucosidase activity} {measurement of light intensity} {concentration of magnesium} {measurement of mean friction velocity} {measurement of mean peak friction velocity} {any other measurement performed or parameter collected, that is not listed here} {concentration of n-alkanes; can include multiple n-alkanes} {concentration of nitrate} {concentration of nitrite} {concentration of nitrogen (total)} {concentration of organic carbon} {concentration of organic matter} {concentration of organic nitrogen} {total count of any organism per gram or volume of sample,should include name of organism followed by count; can include multiple organism counts} {oxygenation status of sample} {concentration of particulate organic carbon} {concentration of particulate organic nitrogen} {type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with time that perturbation occurred; can include multiple perturbation types} {concentration of petroleum hydrocarbon} {pH measurement} {concentration of phaeopigments; can include multiple phaeopigments} {concentration of phosphate} {concentration of phospholipid fatty acids; can include multiple values} {measurement of photon flux} {concentration of potassium} {pressure to which the sample is subject, in atmospheres} {measurement of primary production} {redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential} {Aerobic or anaerobic} {salinity measurement} {Method or device employed for collecting sample} {Processing applied to the sample during or after isolation} {Amount or size of sample (volume, mass or area) that was collected} {none} {none} {none} {volume (mL) or weight (g) of sample processed for DNA extraction} {concentration of silicate} {sodium concentration} {concentration of soluble reactive phosphorus} {unique identifier assigned to a material sample used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples.} {concentration of sulfate} {concentration of sulfide} {concentration of suspended particulate matter} {temperature of the sample at time of sampling} {stage of tide} {measurement of total depth of water column} {total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen} {total inorganic nitrogen content} {total nitrogen content of the sample} {total particulate carbon content} {total phosphorus concentration, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus. Can also be measured without filtering, reported as phosphorus} {measurement of magnitude and direction of flow within a fluid}" << endl; out << "#{text} {text} {text} {text} {controlled vacabulary} {{timestamp}} {{float} m} {{term}} {{term}} {{term}} {{term}:{term}:{text}} {{float} {float}} {{float} {unit}} {{float} {unit}} {{float} m} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{term}; {timestamp}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {['', 'aerobic', 'anaerobic']} {{float} {unit}} {{float} {unit}} {{text};{interval}} {{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{text};{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'aerobe', 'anaerobe', 'facultative', 'microaerophilic', 'microanaerobe', 'obligate aerobe', 'obligate anaerobe']} {{float} {unit}} {{text}} {{text|term}} {{float} {unit}} {{none}} {{none}} {{none}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{text}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {['', 'low', 'high']} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}} {{float} {unit}}" << endl; out << "*sample_name *description *sample_title *seq_methods *organism *collection_date *depth *env_biome *env_feature *env_material *geo_loc_name *lat_lon alkalinity alkyl_diethers altitude aminopept_act ammonium atmospheric_data bac_prod bac_resp bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll conduc density diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_inorg_nitro diss_inorg_phosp diss_org_carb diss_org_nitro diss_oxygen down_par elev fluor glucosidase_act light_intensity magnesium mean_frict_vel mean_peak_frict_vel misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp part_org_carb part_org_nitro perturbation petroleum_hydrocarb ph phaeopigments phosphate phosplipid_fatt_acid photon_flux potassium pressure primary_prod redox_potential rel_to_oxygen salinity samp_collect_device samp_mat_process samp_size samp_store_dur samp_store_loc samp_store_temp samp_vol_we_dna_ext silicate sodium soluble_react_phosp source_material_id sulfate sulfide suspend_part_matter temp tidal_stage tot_depth_water_col tot_diss_nitro tot_inorg_nitro tot_nitro tot_part_carb tot_phosp water_current" << endl; } } for (set::iterator it = Groups.begin(); it != Groups.end(); it++) { out << *it << endl; } out.close(); //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "execute"); exit(1); } } //*************************************************************************************************************** // going to have to rework this to allow for other options -- /* file option 1 sfffile1 oligosfile1 sfffile2 oligosfile2 ... file option 2 fastqfile1 oligosfile1 fastqfile2 oligosfile2 ... file option 3 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 4 group fastqfile fastqfile group fastqfile fastqfile group fastqfile fastqfile ... file option 5 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file ... ***** We are just looking for the group names, so we only care about option 4 and 1 or 2. ***** */ int GetMIMarksPackageCommand::readFile(){ try { inputfile = file; FileFile dataFile(file, "mimarks"); vector< vector > files = dataFile.getFiles(); int fileOption = dataFile.getFileFormat(); if (m->getControl_pressed()) { return 0; } if (fileOption == 2) { // 3 column file with group names map fileIndex2GroupName = dataFile.getFile2Group(); for (map::iterator it = fileIndex2GroupName.begin(); it != fileIndex2GroupName.end(); it++) { Groups.insert(it->second); } }else if (fileOption == 1) { //2 column format, extract names from oligos file for (int i = 0; i < files.size(); i++) { oligosfile = files[i][1]; //second column file Oligos oligos; oligos.read(oligosfile); createGroupNames(oligos); // adding in groupNames from this file } }else if (fileOption == 3) { //4 column format, make sure oligos parameter was set if (!setOligosParameter) { m->mothurOut("[ERROR]: You must have an oligosfile with the index file option. Aborting. \n"); m->setControl_pressed(true); } } return 0; } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "readFile"); exit(1); } } //********************************************************************************************************************** void GetMIMarksPackageCommand::createGroupNames(Oligos& oligos) { try { vector groupNames = oligos.getSRAGroupNames(); if (groupNames.size() == 0) { m->mothurOut("[ERROR]: your oligos file does not contain any group names.\n"); m->setControl_pressed(true); } else { for (int i = 0; i < groupNames.size(); i++) { Groups.insert(groupNames[i]); } } } catch(exception& e) { m->errorOut(e, "GetMIMarksPackageCommand", "createGroupNames"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getmimarkspackagecommand.h000077500000000000000000000026031424121717000234150ustar00rootroot00000000000000// // getmimarkspackagecommand.h // Mothur // // Created by Sarah Westcott on 3/25/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #ifndef Mothur_getmimarkspackagecommand_h #define Mothur_getmimarkspackagecommand_h #include "command.hpp" #include "oligos.h" #include "filefile.hpp" /**************************************************************************************************/ class GetMIMarksPackageCommand : public Command { public: GetMIMarksPackageCommand(string); ~GetMIMarksPackageCommand(){} vector setParameters(); string getCommandName() { return "get.mimarkspackage"; } string getCommandCategory() { return "Sequence Processing"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/get.mimarkspackage"; } string getDescription() { return "create blank mimarks package form for sra command"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, requiredonly, setOligosParameter; string oligosfile, groupfile, package, inputfile, file, inputDir; vector outputNames; set Groups; void createGroupNames(Oligos& oligos); int readFile(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/getoturepcommand.cpp000066400000000000000000001377371424121717000223250ustar00rootroot00000000000000/* * getoturepcommand.cpp * Mothur * * Created by Sarah Westcott on 4/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "getoturepcommand.h" #include "readphylip.h" #include "readcolumn.h" //******************************************************************************************************************** //sorts lowest to highest inline bool compareName(repStruct left, repStruct right){ return (left.name < right.name); } //******************************************************************************************************************** //sorts lowest to highest inline bool compareBin(repStruct left, repStruct right){ return (left.simpleBin < right.simpleBin); } //******************************************************************************************************************** //sorts lowest to highest inline bool compareSize(repStruct left, repStruct right){ return (left.size < right.size); } //******************************************************************************************************************** //sorts lowest to highest inline bool compareGroup(repStruct left, repStruct right){ return (left.group < right.group); } //********************************************************************************************************************** vector GetOTURepCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","name",false,true, true); parameters.push_back(plist); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false, true); parameters.push_back(pfasta); CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false, true); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false, true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName","count",false,false, true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false, true); parameters.push_back(pgroup); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","",false,false, true); parameters.push_back(pcolumn); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pweighted); CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "","",false,false); parameters.push_back(psorted); CommandParameter pmethod("method", "Multiple", "distance-abundance", "distance", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter prename("rename", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prename); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetOTURepCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted, method and rename. The list parameter is required, as well as phylip or column and name if you are using method=distance. If method=abundance a name or count file is required.\n"; helpString += "The rename parameter allows you to indicate you want the OTU label to replace the representative sequence name. Default=F. \n"; helpString += "The phylip or column parameter is required for method=distance, but only one may be used. If you use a column file the name or count filename is required. \n"; helpString += "The method parameter allows you to select the method of selecting the representative sequence. Choices are distance and abundance. The distance method finds the sequence with the largest number of close sequences in the OTU. If tie occurs, a sequence is randomly selected from the ties. The abundance method chooses the most abundant sequence in the OTU as the representative.\n"; helpString += "If you do not provide a cutoff value 0.03 is assumed. If you do not provide a precision value then 100 is assumed.\n"; helpString += "Multiple cutoffs can be entered as follows cutoff=0.01-0.03. \n"; helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile).\n"; helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n"; helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n"; helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true. The default value is false.\n"; helpString += "The representative is found by selecting the sequence with the most \"close\" sequences in the OTU. If a tie occurs a seqeunce is chosen at random from the ties.\n"; helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n"; helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n"; helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n"; helpString += "The group parameter allows you provide a group file.\n"; helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n"; helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n"; helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetOTURepCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[tag],rep.fasta-[filename],[tag],[group],rep.fasta"; } else if (type == "name") { pattern = "[filename],[tag],rep.names-[filename],[tag],[group],rep.names"; } else if (type == "count") { pattern = "[filename],count_table-[filename],[tag],rep.count_table-[filename],[tag],[group],rep.count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetOTURepCommand::GetOTURepCommand(string option) : Command() { try{ //allow user to run help if (option == "help") { help(); abort = true; calledHelp = true; }else if(option == "citation") { citation(); abort = true; calledHelp = true; } else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = ""; } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current list file and the list parameter is required.\n"); abort = true; } } else if (listfile == "not open") { abort = true; } else { current->setListFile(listfile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not found") { phylipfile = ""; } else if (phylipfile == "not open") { abort = true; } else { distFile = phylipfile; format = "phylip"; current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not found") { columnfile = ""; } else if (columnfile == "not open") { abort = true; } else { distFile = columnfile; format = "column"; current->setColumnFile(columnfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } hasGroups = false; countfile = validParameter.validFile(parameters, "count"); if (countfile == "not found") { countfile = ""; } else if (countfile == "not open") { abort = true; countfile = ""; } else { current->setCountFile(countfile); ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { hasGroups = true; } } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } method = validParameter.valid(parameters, "method"); if (method == "not found"){ method = "distance"; } if ((method != "distance") && (method != "abundance")) { m->mothurOut(method + " is not a valid option for the method parameter. The only options are: distance and abundance, aborting.\n"); abort = true; } if (method == "distance") { if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command.\n"); abort = true; } } }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column.\n"); abort = true; } if (columnfile != "") { if ((namefile == "") && (countfile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format.\n"); abort = true; } } } } }else if (method == "abundance") { if ((namefile == "") && (countfile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a namefile or countfile if you are going to use the abundance method.\n"); abort = true; } } } if ((phylipfile != "") || (columnfile != "")) { m->mothurOut("[WARNING]: A phylip or column file is not needed to use the abundance method, ignoring.\n"); phylipfile = ""; columnfile = ""; } } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } cutoffSet = false; string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "0.03"; } else { cutoffSet = true; } int pos = temp.find('-'); if (pos != string::npos) { //multiple cutoffs given util.splitAtDash(temp, cutoffs); temp = *cutoffs.begin(); }else { cutoffs.insert(temp); } util.mothurConvert(temp, cutoff); sorted = validParameter.valid(parameters, "sorted"); if (sorted == "not found"){ sorted = ""; } if (sorted == "none") { sorted=""; } if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) { m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort.\n"); sorted = ""; } if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) { m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort.\n"); sorted = ""; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { if ((groupfile == "") && (!hasGroups)) { m->mothurOut("You must provide a groupfile to use groups.\n"); abort = true; }else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } } temp = validParameter.valid(parameters, "weighted"); if (temp == "not found") { temp = "f"; } weighted = util.isTrue(temp); temp = validParameter.valid(parameters, "rename"); if (temp == "not found") { temp = "f"; } rename = util.isTrue(temp); if ((rename) && (namefile != "")) { m->mothurOut("[WARNING]: You cannot use the rename option when you provide a namesfile due to downstream issues. Setting rename to false.\n"); rename = false; } if ((weighted) && (namefile == "")) { m->mothurOut("[ERROR]: You cannot set weighted to true unless you provide a namesfile.\n"); abort = true; } temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, precision); matrix = nullptr; } } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand"); exit(1); } } //********************************************************************************************************************** int GetOTURepCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (method == "distance") { if ((namefile != "") && (groupfile != "")) { createCount(); } //create count file for simplicity } if (namefile != "") { nameMap = util.readNames(namefile); } if (m->getControl_pressed()) { return 0; } GroupMap groupMap; if (groupfile != "") { //read in group map info. GroupMap groupMap(groupfile); int error = groupMap.readMap(); if (error == 1) { m->mothurOut("Error reading your groupfile. Proceeding without groupfile.\n"); groupfile = ""; } } if (!cutoffSet) { InputData input(listfile, "list", Groups); ListVector* list = input.getListVector(); string lastLabel = list->getLabel(); m->mothurOut("You did not provide a label, using " + lastLabel + ".\n"); if (lastLabel == "unique") { cutoff = 0.0; } if (method == "distance") { readDist(); } process(list, groupMap); delete list; } else { //multiple cutoffs for (set::iterator it = cutoffs.begin(); it != cutoffs.end(); it++) { if (*it == "unique") { cutoff = 0.0; } else { util.mothurConvert(*it, cutoff); } if (method == "distance") { readDist(); } InputData input(listfile, "list", Groups); ListVector* list = input.getListVector(*it); if (list != nullptr) { string lastLabel = list->getLabel(); process(list, groupMap); delete list; } } } //handles multiple labels if (fastafile != "") { //read fastafile FastaMap fasta; fasta.readFastaFile(fastafile); //if user gave a namesfile then use it if (namefile != "") { readNamesFile(fasta); } //output create and output the .rep.fasta files map::iterator itNameFile; for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) { processFastaNames(itNameFile->first, itNameFile->second, fasta, groupMap); } }else { //output create and output the .rep.fasta files map::iterator itNameFile; for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) { processNames(itNameFile->first, itNameFile->second); } } if (m->getControl_pressed()) { return 0; } //set fasta file as new current fastafile - use first one?? string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetOTURepCommand::readDist() { try { string nameOrCount = ""; string thisNamefile = ""; map counts; if (countfile != "") { nameOrCount = "count"; thisNamefile = countfile; CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } else if (namefile != "") { nameOrCount = "name"; thisNamefile = namefile; } string distfile = columnfile; if (format == "phylip") { distfile = phylipfile; } if (matrix != nullptr) { delete matrix; } matrix = new OptiMatrix(distfile, thisNamefile, nameOrCount, format, cutoff, false); if (m->getControl_pressed()) { return 0; } return 0; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "readDist"); exit(1); } } //********************************************************************************************************************** void GetOTURepCommand::createCount() { try { CountTable ct; ct.createTable(namefile, groupfile, nullVector); if (outputdir == "") { outputdir = util.hasPath(namefile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(namefile)); countfile = getOutputFileName("count", variables); ct.printCompressedTable(countfile); current->setCountFile(countfile); if (ct.hasGroupInfo()) { hasGroups = true; } vector uniqueNames = ct.getNamesOfSeqs(); util.printAccnos("temp.accnos", uniqueNames); string inputString = "list=" + listfile + ", accnos=temp.accnos"; m->mothurOut("/******************************************/\n"); m->mothurOut("\nRunning command: get.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* getSeqsCommand = new GetSeqsCommand(inputString); getSeqsCommand->execute(); string templistfile = getSeqsCommand->getOutputFiles()["list"][0]; string newName = util.getRootName(listfile) + "unique.list"; util.renameFile(templistfile, newName); listfile = newName; namefile = ""; groupfile = ""; util.mothurRemove("temp.accnos"); delete getSeqsCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "readDist"); exit(1); } } //********************************************************************************************************************** void GetOTURepCommand::readNamesFile(FastaMap& fasta) { try { vector dupNames; ifstream in; util.openInputFile(namefile, in); string name, names, sequence; while(!in.eof()){ in >> name; //read from first column A in >> names; //read from second column A,B,C,D dupNames.clear(); //parse names into vector util.splitAtComma(names, dupNames); //store names in fasta map sequence = fasta.getSequence(name); for (int i = 0; i < dupNames.size(); i++) { fasta.push_back(dupNames[i], sequence); } gobble(in); } in.close(); } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "readNamesFile"); exit(1); } } //********************************************************************************************************************** string GetOTURepCommand::findRepAbund(vector names, string group) { try{ vector reps; string rep = "notFound"; Utils util; if (m->getDebug()) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); } if ((names.size() == 1)) { return names[0]; }else{ //fill seqIndex and initialize sums int maxAbund = 0; for (int i = 0; i < names.size(); i++) { if (m->getControl_pressed()) { return "control"; } if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. int numRep = 0; if (group != "") { numRep = ct.getGroupCount(names[i], group); } else { numRep = ct.getNumSeqs(names[i]); } if (numRep > maxAbund) { reps.clear(); reps.push_back(names[i]); maxAbund = numRep; }else if(numRep == maxAbund) { //tie reps.push_back(names[i]); } }else { //name file used, we assume list file contains all sequences map::iterator itNameMap = nameMap.find(names[i]); if (itNameMap == nameMap.end()) {} //assume that this sequence is not a unique else { if (itNameMap->second > maxAbund) { reps.clear(); reps.push_back(names[i]); maxAbund = itNameMap->second; }else if(itNameMap->second == maxAbund) { //tie reps.push_back(names[i]); } } } } if (reps.size() == 0) { m->mothurOut("[ERROR]: no rep found, file mismatch?? Quitting.\n"); m->setControl_pressed(true); } else if (reps.size() == 1) { rep = reps[0]; } else { //tie int maxIndex = reps.size()-1; int index = util.getRandomIndex(maxIndex); rep = reps[index]; } } return rep; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "findRepAbund"); exit(1); } } //********************************************************************************************************************** string GetOTURepCommand::findRep(vector names, map& matrixNameIndexes, string group) { try{ //if using abundance if (method == "abundance") { return (findRepAbund(names, group)); } else { //find rep based on distance // if only 1 sequence in bin or processing the "unique" label, then // the first sequence of the OTU is the representative one if ((names.size() == 1)) { return names[0]; }else{ //unique sequence with greatest number of "close" seqs in the OTU vector binTranslated; //fill seqIndex and initialize sums for (size_t i = 0; i < names.size(); i++) { if (m->getControl_pressed()) { return names[0]; } map::iterator itNameIndex = matrixNameIndexes.find(names[i]); if (itNameIndex == matrixNameIndexes.end()) { } //no distances in matrix, or not unique else { //you have a distance in the matrix, do we need to inflate the otu for weighted option? long long matrixIndex = itNameIndex->second; if (weighted) { binTranslated.push_back(matrixIndex); int numRep = 0; if (countfile != "") { //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone. if (group != "") { numRep = ct.getGroupCount(names[i], group); } else { numRep = ct.getNumSeqs(names[i]); } }else if (namefile != "") { map::iterator itNameFile = nameMap.find(names[i]); if (itNameFile == nameMap.end()) { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); } else{ numRep = itNameFile->second; } } for (int j = 1; j < numRep; j++) { binTranslated.push_back(matrixIndex); } //inflate redundants }else { if (namefile == "") { binTranslated.push_back(matrixIndex); } //will be unique and in matrix, possible rep else {//name file, no group because if group file was present we could be usingthe count file map::iterator itNameFile = nameMap.find(names[i]); if (itNameFile == nameMap.end()) { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); } else{ binTranslated.push_back(matrixIndex); } } } } } //True Negative - far, cluster apart //True Positive - close, cluster together //False Negative - close, cluster apart //False Positve - far, cluster together vector numCloseInBin; numCloseInBin.resize(binTranslated.size(), 0); for (size_t i=0; i < binTranslated.size(); i++) { if (m->getControl_pressed()) { return "control"; } for (size_t j = 0; j < i; j++) { if (matrix->isClose(binTranslated[i], binTranslated[j])) { numCloseInBin[i]++; numCloseInBin[j]++; }else if (binTranslated[i] == binTranslated[j]) { //you have inflated the otu and need to count this as a close match numCloseInBin[i]++; numCloseInBin[j]++; } } } string repName = ""; if (binTranslated.size() == 0) { repName = names[0]; } //when names file is used singleton OTUs may contain multiple read names, but no dists in matrix else { long long minIndex = binTranslated[0]; int min = numCloseInBin[0]; vector ties; ties.push_back(binTranslated[0]); for (size_t i=1; i < numCloseInBin.size(); i++) { if (m->getControl_pressed()) { return "control"; } if (numCloseInBin[i] > min) { ties.clear(); min = numCloseInBin[i]; minIndex = binTranslated[i]; ties.push_back(binTranslated[i]); }else if (numCloseInBin[i] == min) { ties.push_back(binTranslated[i]); } } if (ties.size() > 0) { long long numTies = ties.size()-1; long long randomIndex = util.getRandomIndex(numTies); repName = matrix->getName(ties[randomIndex]); }else { repName = matrix->getName(minIndex); } if (namefile != "") { vector redundNames; util.splitAtComma(repName, redundNames); repName = redundNames[0]; } } return repName; } } } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "FindRep"); exit(1); } } //********************************************************************************************************************** int GetOTURepCommand::process(ListVector* processList, GroupMap& groupMap) { try{ m->mothurOut(processList->getLabel() + "\t" + toString(processList->getNumBins()) + "\n"); string name, sequence; string nameRep; //create output file if (outputdir == "") { outputdir += util.hasPath(listfile); } ofstream newNamesOutput; string outputNamesFile; map files; //group -> filenameAW map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); if (Groups.size() == 0) { //you don't want to use groups variables["[tag]"] = processList->getLabel(); if (countfile == "") { outputNamesFile = getOutputFileName("name", variables); outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); }else { outputNamesFile = getOutputFileName("count", variables); outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); } outputNameFiles[outputNamesFile] = processList->getLabel(); util.openOutputFile(outputNamesFile, newNamesOutput); newNamesOutput << "noGroup" << endl; }else{ //you want to use groups for (int i=0; igetLabel(); variables["[group]"] = Groups[i]; outputNamesFile = outputdir + util.getRootName(util.getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + "."; if (countfile == "") { outputNamesFile = getOutputFileName("name", variables); outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); }else { outputNamesFile = getOutputFileName("count", variables); outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); } files[Groups[i]] = outputNamesFile; ofstream temp; util.openOutputFile(outputNamesFile, temp); temp << Groups[i] << endl; temp.close(); outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i]; } } map matrixNameIndexes; if (method != "abundance") { matrix->getNameIndexMap(); } //maps unique names to index in matrix //for each bin in the list vector vector binLabels = processList->getLabels(); for (int i = 0; i < processList->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); if (Groups.size() == 0) { newNamesOutput.close(); } return 0; } string temp = processList->get(i); vector namesInBin; util.splitAtComma(temp, namesInBin); if (Groups.size() == 0) { nameRep = findRep(namesInBin, matrixNameIndexes, ""); newNamesOutput << binLabels[i] << '\t' << nameRep << '\t'; //put rep at first position in names line string outputString = nameRep + ","; for (int k=0; k > NamesInGroup; for (int j=0; jmothurOut(namesInBin[j] + " is not in your groupfile, please correct.\n"); m->setControl_pressed(true); } //add this name to correct group if (util.inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]); } }else { vector thisSeqsGroups = ct.getGroups(namesInBin[j]); for (int k = 0; k < thisSeqsGroups.size(); k++) { if (util.inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]); } } } } //get rep for each group in otu for (int j=0; jerrorOut(e, "GetOTURepCommand", "process"); exit(1); } } //********************************************************************************************************************** int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap& fasta, GroupMap& groupMap) { try{ //create output file if (outputdir == "") { outputdir += util.hasPath(listfile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[tag]"] = label; string outputFileName = getOutputFileName("fasta",variables); util.openOutputFile(outputFileName, out); vector reps; outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); ofstream out2; string tempNameFile = filename + ".temp"; util.openOutputFile(tempNameFile, out2); ifstream in; util.openInputFile(filename, in); string tempGroup = ""; in >> tempGroup; gobble(in); CountTable thisCt; if (countfile != "") { thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } int thistotal = 0; while (!in.eof()) { string rep, binnames, binLabel; in >> binLabel >> rep >> binnames; gobble(in); string repName = rep; if (rename) { repName = binLabel; } vector names; util.splitAtComma(binnames, names); int binsize = names.size(); if (countfile == "") { out2 << repName << '\t' << binnames << endl; } else { if (tempGroup == "noGroup") { for (int j = 0; j < names.size(); j++) { if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); } } binsize = thisCt.getNumSeqs(rep); if (rename) { thisCt.renameSeq(rep, repName); } }else { int total = 0; for (int j = 0; j < names.size(); j++) { total += thisCt.getGroupCount(names[j], tempGroup); } out2 << repName << '\t' << total << '\t' << total << endl; binsize = total; } } thistotal += binsize; //if you have a groupfile string group = ""; map groups; map::iterator groupIt; if (groupfile != "") { //find the groups that are in this bin for (int i = 0; i < names.size(); i++) { string groupName = groupMap.getGroup(names[i]); if (groupName == "not found") { m->mothurOut(names[i] + " is missing from your group file. Please correct.\n"); groupError = true; } else { groups[groupName] = groupName; } } //turn the groups into a string for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) { group += groupIt->first + "-"; } //rip off last dash group = group.substr(0, group.length()-1); }else if (hasGroups) { map groups; for (int i = 0; i < names.size(); i++) { vector thisSeqsGroups = ct.getGroups(names[i]); for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; } } //turn the groups into a string for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) { group += groupIt->first + "-"; } //rip off last dash group = group.substr(0, group.length()-1); } else{ group = ""; } //print out name and sequence for that bin string sequence = fasta.getSequence(rep); if (sequence != "not found") { if (sorted == "") { //print them out repName = repName + "\t" + binLabel; repName = repName + "|" + toString(binsize); if (group != "") { repName = repName + "|" + group; } out << ">" << repName << endl; out << sequence << endl; }else { //save them int simpleLabel; util.mothurConvert(util.getSimpleLabel(binLabel), simpleLabel); repStruct newRep(repName, sequence, binLabel, simpleLabel, binsize, group); reps.push_back(newRep); } }else { m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct.\n"); } } if (sorted != "") { //then sort them and print them if (sorted == "name") { sort(reps.begin(), reps.end(), compareName); } else if (sorted == "bin") { sort(reps.begin(), reps.end(), compareBin); } else if (sorted == "size") { sort(reps.begin(), reps.end(), compareSize); } else if (sorted == "group") { sort(reps.begin(), reps.end(), compareGroup); } //print them for (int i = 0; i < reps.size(); i++) { string outputName = reps[i].name + "\t" + reps[i].bin; outputName = outputName + "|" + toString(reps[i].size); if (reps[i].group != "") { outputName = outputName + "|" + reps[i].group; } out << ">" << outputName << endl; out << reps[i].sequence << endl; } } in.close(); out.close(); out2.close(); util.mothurRemove(filename); util.renameFile(tempNameFile.c_str(), filename.c_str()); if ((countfile != "") && (tempGroup == "noGroup")) { if (rename) { //want otu order not order from old count thisCt.printSortedTable(filename); }else { thisCt.printTable(filename); } } return 0; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "processFastaNames"); exit(1); } } //********************************************************************************************************************** int GetOTURepCommand::processNames(string filename, string label) { try{ //create output file if (outputdir == "") { outputdir += util.hasPath(listfile); } ofstream out2; string tempNameFile = filename + ".temp"; util.openOutputFile(tempNameFile, out2); ifstream in; util.openInputFile(filename, in); string rep, binnames; string tempGroup = ""; in >> tempGroup; gobble(in); CountTable thisCt; if (countfile != "") { thisCt.readTable(countfile, true, false); if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; } } while (!in.eof()) { if (m->getControl_pressed()) { break; } string binLabel; in >> binLabel >> rep >> binnames; gobble(in); string repName = rep; if (rename) { repName = binLabel; } if (countfile == "") { out2 << repName << '\t' << binnames << endl; } else { vector names; util.splitAtComma(binnames, names); if (tempGroup == "noGroup") { for (int j = 0; j < names.size(); j++) { if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); } } if (rename) { thisCt.renameSeq(rep, repName); } }else { int total = 0; for (int j = 0; j < names.size(); j++) { total += thisCt.getGroupCount(names[j], tempGroup); } out2 << repName << '\t' << total << '\t' << total << endl; } } } in.close(); out2.close(); util.mothurRemove(filename); util.renameFile(tempNameFile.c_str(), filename.c_str()); if ((countfile != "") && (tempGroup == "noGroup")) { if (rename) { //want otu order not order from old count thisCt.printSortedTable(filename); }else { thisCt.printTable(filename); } } return 0; } catch(exception& e) { m->errorOut(e, "GetOTURepCommand", "processNames"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getoturepcommand.h000077500000000000000000000046271424121717000217640ustar00rootroot00000000000000#ifndef GETOTUREPCOMMAND_H #define GETOTUREPCOMMAND_H /* * getoturepcommand.h * Mothur * * Created by Sarah Westcott on 4/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin. */ #include "command.hpp" #include "listvector.hpp" #include "inputdata.h" #include "fastamap.h" #include "groupmap.h" #include "counttable.h" #include "optimatrix.h" #include "nameassignment.hpp" #include "countseqscommand.h" #include "getseqscommand.h" #include "calculator.h" #include "mcc.hpp" typedef map SeqMap; struct repStruct { string name; string sequence; string bin; int simpleBin; int size; string group; repStruct(){} repStruct(string n, string seq, string b, int sb, int s, string g) : name(n), bin(b), size(s), group(g), simpleBin(sb), sequence(seq) { } ~repStruct() = default; }; class GetOTURepCommand : public Command { public: GetOTURepCommand(string); ~GetOTURepCommand(){} vector setParameters(); string getCommandName() { return "get.oturep"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.oturep"; } string getDescription() { return "gets a representative sequence for each OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: map nameMap; OptiData* matrix; CountTable ct; string filename, fastafile, listfile, namefile, groupfile, sorted, phylipfile, countfile, columnfile, distFile, format, groups, method; ofstream out; ifstream in, inNames, inRow; bool abort, allLines, groupError, weighted, hasGroups, rename, cutoffSet; vector outputNames, Groups; map outputNameFiles; set cutoffs; float cutoff; int precision; void readNamesFile(FastaMap&); int process(ListVector*, GroupMap&); string findRep(vector, map&, string); // returns the name of the "representative" sequence of given bin or subset of a bin, for groups string findRepAbund(vector, string); int processNames(string, string); int processFastaNames(string, string, FastaMap&, GroupMap&); int readDist(); void createCount(); }; #endif mothur-1.48.0/source/commands/getotuscommand.cpp000077500000000000000000000570571424121717000220000ustar00rootroot00000000000000// // getotulabelscommand.cpp // Mothur // // Created by Sarah Westcott on 5/21/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "getotuscommand.h" //********************************************************************************************************************** vector GetOtusCommand::setParameters(){ try { CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(paccnos); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false, true); parameters.push_back(potucorr); CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false, true); parameters.push_back(pcorraxes); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["list"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetOtusCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.otus command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes commands. It can also be used to select a set of otus from a shared or list file.\n"; helpString += "The get.otus parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n"; helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n"; helpString += "The otucorr parameter is used to input the results of the otu.association command.\n"; helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n"; helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The get.otus commmand should be in the following format: \n"; helpString += "get.otus(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetOtusCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetOtusCommand::GetOtusCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } else { current->setConsTaxonomyFile(constaxonomyfile); } corraxesfile = validParameter.validFile(parameters, "corraxes"); if (corraxesfile == "not open") { corraxesfile = ""; abort = true; } else if (corraxesfile == "not found") { corraxesfile = ""; } otucorrfile = validParameter.validFile(parameters, "otucorr"); if (otucorrfile == "not open") { otucorrfile = ""; abort = true; } else if (otucorrfile == "not found") { otucorrfile = ""; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list.\n"); abort = true; } if ((sharedfile != "") || (listfile != "")) { label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } } } } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "GetOtusCommand"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get labels you want to keep labels = util.readAccnos(accnosfile); //simplfy labels unordered_set newLabels; for (auto it = labels.begin(); it != labels.end(); it++) { newLabels.insert(util.getSimpleLabel(*it)); } labels = newLabels; if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (constaxonomyfile != "") { readClassifyOtu(); } if (corraxesfile != "") { readCorrAxes(); } if (otucorrfile != "") { readOtuAssociation(); } if (listfile != "") { readList(); } if (sharedfile != "") { readShared(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::readClassifyOtu(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomyfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomyfile)); variables["[extension]"] = util.getExtension(constaxonomyfile); string outputFileName = getOutputFileName("constaxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(constaxonomyfile, in); bool wroteSomething = false; int selectedCount = 0; //read headers string headers = util.getline(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; string tax = "unknown"; int size = 0; in >> otu >> size; gobble(in); tax = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("Otu=" + otu + ", size=" + toString(size) + ", tax=" + tax + "\n"); } if (labels.count(util.getSimpleLabel(otu)) != 0) { wroteSomething = true; selectedCount++; out << otu << '\t' << size << '\t' << tax << endl; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " otus from your constaxonomy file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readClassifyOtu"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::readOtuAssociation(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(otucorrfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(otucorrfile)); variables["[extension]"] = util.getExtension(otucorrfile); string outputFileName = getOutputFileName("otucorr", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(otucorrfile, in); bool wroteSomething = false; int selectedCount = 0; //read headers string headers = util.getline(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu1 = ""; string otu2 = ""; in >> otu1 >> otu2; string line = util.getline(in); gobble(in); if ((labels.count(util.getSimpleLabel(otu1)) != 0) && (labels.count(util.getSimpleLabel(otu2)) != 0)){ wroteSomething = true; selectedCount++; out << otu1 << '\t' << otu2 << '\t' << line << endl; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " lines from your otu.corr file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readOtuAssociation"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::readCorrAxes(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(corraxesfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(corraxesfile)); variables["[extension]"] = util.getExtension(corraxesfile); string outputFileName = getOutputFileName("corraxes", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(corraxesfile, in); bool wroteSomething = false; int selectedCount = 0; //read headers string headers = util.getline(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; in >> otu; string line = util.getline(in); gobble(in); if (labels.count(util.getSimpleLabel(otu)) != 0) { wroteSomething = true; selectedCount++; out << otu << '\t' << line << endl; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " lines from your corr.axes file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readCorrAxes"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::readShared(){ try { SharedRAbundVectors* lookup = getShared(); if (m->getControl_pressed()) { delete lookup; return 0; } vector newLabels; bool wroteSomething = false; int numSelected = 0; vector binsToRemove; for (int i = 0; i < lookup->getNumBins(); i++) { if (m->getControl_pressed()) { delete lookup; return 0; } //is this otu on the list if (labels.count(util.getSimpleLabel(lookup->getOTUNames()[i])) != 0) { numSelected++; wroteSomething = true; }else { binsToRemove.push_back(i); } } lookup->removeOTUs(binsToRemove); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); variables["[distance]"] = lookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); bool printHeaders = true; lookup->print(out, printHeaders); out.close(); delete lookup; if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file.\n"); } m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readShared"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::readList(){ try { getListVector(); if (m->getControl_pressed()) { delete list; return 0;} ListVector newList; newList.setLabel(list->getLabel()); int selectedCount = 0; bool wroteSomething = false; vector binLabels = list->getLabels(); vector newLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { delete list; return 0;} if (labels.count(util.getSimpleLabel(binLabels[i])) != 0) { selectedCount++; newList.push_back(list->get(i)); newLabels.push_back(binLabels[i]); } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); delete list; //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.print(out, false); } out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readList"); exit(1); } } //********************************************************************************************************************** int GetOtusCommand::getListVector(){ try { InputData input(listfile, "list", nullVector); list = input.getListVector(); string lastLabel = list->getLabel(); if (label == "") { label = lastLabel; return 0; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((list != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return 0; } if(labels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input.getListVector(lastLabel); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); break; } lastLabel = list->getLabel(); //get next line to process //prevent memory leak delete list; list = input.getListVector(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete list; list = input.getListVector(lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "getListVector"); exit(1); } } //********************************************************************************************************************** SharedRAbundVectors* GetOtusCommand::getShared(){ try { InputData input(sharedfile, "sharedfile", nullVector); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); string lastLabel = lookup->getLabel(); if (label == "") { label = lastLabel; return lookup; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { delete lookup; return nullptr; } if(labels.count(lookup->getLabel()) == 1){ processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); break; } if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup->getLabel(); delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); //restore real lastlabel to save below lookup->setLabels(saveLabel); break; } lastLabel = lookup->getLabel(); //get next line to process //prevent memory leak delete lookup; lookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); } return lookup; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "getShared"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getotuscommand.h000077500000000000000000000030201424121717000214220ustar00rootroot00000000000000#ifndef Mothur_getotulabelscommand_h #define Mothur_getotulabelscommand_h // // getotuscommand.h // Mothur // // Created by Sarah Westcott on 5/21/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "inputdata.h" #include "listvector.hpp" /**************************************************************************************************/ class GetOtusCommand : public Command { public: GetOtusCommand(string); ~GetOtusCommand(){} vector setParameters(); string getCommandName() { return "get.otus"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.otus"; } string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label; vector outputNames; unordered_set labels; ListVector* list; int readClassifyOtu(); int readOtuAssociation(); int readCorrAxes(); int readList(); int readShared(); int getListVector(); SharedRAbundVectors* getShared(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/getrabundcommand.cpp000077500000000000000000000277701424121717000222600ustar00rootroot00000000000000/* * getrabundcommand.cpp * Mothur * * Created by Sarah Westcott on 6/2/09. * Copyright 2009 Schloss Lab Umass Amherst. All rights reserved. * */ #include "getrabundcommand.h" //********************************************************************************************************************** vector GetRAbundCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","rabund",false,false, true); parameters.push_back(pshared); CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","rabund",false,false, true); parameters.push_back(plist); CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none","",false,false, false); parameters.push_back(pcount); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","rabund",false,false, true); parameters.push_back(psabund); CommandParameter psorted("sorted", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(psorted); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["rabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetRAbundCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.rabund command parameters are list, shared, sabund, count, label, groups and sorted. shared, list or sabund parameters are required, unless you have valid current files.\n"; helpString += "The count parameter allows you to provide a count file associated with your list file. If you clustered with a countfile the list file only contains the unique sequences and you will want to add the redundant counts into the rabund file, providing the count file allows you to do so.\n"; helpString += "The label parameter allows you to select what distance levels you would like included in your .rabund file, and are separated by dashes.\n"; helpString += "The sorted parameters allows you to print the rabund results sorted by abundance or not. The default is sorted.\n"; helpString += "The get.rabund command should be in the following format: get.rabund(label=yourLabels, sorted=yourSorted).\n"; helpString += "Example get.rabund(sorted=F).\n"; helpString += "The default value for label is all labels in your inputfile.\n"; helpString += "The get.rabund command outputs a .rabund file containing the lines you selected.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetRAbundCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "rabund") { pattern = "[filename],rabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetRAbundCommand::GetRAbundCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "sorted"); if (temp == "not found") { temp = "T"; } sorted = util.isTrue(temp); label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } if ((listfile == "") && (sabundfile == "") && (sharedfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n");} else { sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared, list or sabund file.\n"); abort = true; } } } } if ((countfile != "") && (listfile == "")) { m->mothurOut("[ERROR]: You can only use the count file with a list file, aborting.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } } } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "GetRAbundCommand"); exit(1); } } //********************************************************************************************************************** int GetRAbundCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); filename = getOutputFileName("rabund", variables); util.openOutputFile(filename, out); if (countfile != "") { processList(out); }else { InputData input(inputfile, format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; RAbundVector* rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); while (rabund != nullptr) { if (m->getControl_pressed()) { delete rabund; break; } if(sorted) { rabund->print(out); } else { rabund->nonSortedPrint(out); } delete rabund; rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); } } out.close(); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(filename); return 0; } m->mothurOut("\nOutput File Names:\n"+filename+"\n\n"); outputNames.push_back(filename); outputTypes["rabund"].push_back(filename); //set rabund file as new current rabundfile string currentName = ""; itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetRAbundCommand::processList(ofstream& out){ try { CountTable ct; ct.readTable(countfile, false, false); InputData input(inputfile, format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); if (m->getControl_pressed()) { delete list; return 0; } while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } RAbundVector* rabund = new RAbundVector(); createRabund(ct, list, rabund); if(sorted) { rabund->print(out); } else { rabund->nonSortedPrint(out); } delete rabund; delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "processList"); exit(1); } } //********************************************************************************************************************** int GetRAbundCommand::createRabund(CountTable& ct, ListVector*& list, RAbundVector*& rabund){ try { rabund->setLabel(list->getLabel()); for(int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { return 0; } vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += ct.getNumSeqs(binNames[j]); } rabund->push_back(total); } return 0; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "createRabund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getrabundcommand.h000077500000000000000000000022361424121717000217130ustar00rootroot00000000000000#ifndef GETRABUNDCOMMAND_H #define GETRABUNDCOMMAND_H /* * getrabundcommand.h * Mothur * * Created by Sarah Westcott on 6/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "listvector.hpp" class GetRAbundCommand : public Command { public: GetRAbundCommand(string); ~GetRAbundCommand(){} vector setParameters(); string getCommandName() { return "get.rabund"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.rabund"; } string getDescription() { return "creates a rabund file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string filename, listfile, sabundfile, inputfile, format, countfile, sharedfile; ofstream out; vector outputNames, Groups; bool abort, allLines, sorted; set labels; //holds labels to be used string label; int processList(ofstream& out); int createRabund(CountTable& ct, ListVector*& list, RAbundVector*& rabund); }; #endif mothur-1.48.0/source/commands/getrelabundcommand.cpp000077500000000000000000000232401424121717000225650ustar00rootroot00000000000000/* * getrelabundcommand.cpp * Mothur * * Created by westcott on 6/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "getrelabundcommand.h" //********************************************************************************************************************** vector GetRelAbundCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","relabund",false,true, true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["relabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetRelAbundCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.relabund command parameters are shared, groups, scale and label. shared is required, unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n"; helpString += "The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n"; helpString += "Example get.relabund(groups=A-B-C, scale=averagegroup).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += "The get.relabund command outputs a .relabund file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetRelAbundCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "relabund") { pattern = "[filename],relabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetRelAbundCommand::GetRelAbundCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } scale = validParameter.valid(parameters, "scale"); if (scale == "not found") { scale = "totalgroup"; } if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) { m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "GetRelAbundCommand"); exit(1); } } //********************************************************************************************************************** int GetRelAbundCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); string outputFileName = getOutputFileName("relabund", variables); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); vector binLabels = lookup->getOTUNames(); out << "label\tGroup\tnumOtus"; for (int i = 0; i < binLabels.size(); i++) { out << '\t' << binLabels[i]; } out << endl; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } getRelAbundance(lookup, out); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["relabund"].push_back(outputFileName); m->mothurOutEndLine(); //set relabund file as new current relabundfile string currentName = ""; itTypes = outputTypes.find("relabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRelAbundFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetRelAbundCommand::getRelAbundance(SharedRAbundVectors*& thisLookUp, ofstream& out){ try { vector groups = thisLookUp->getNamesGroups(); vector binLabels = thisLookUp->getOTUNames(); for (int i = 0; i < thisLookUp->size(); i++) { out << thisLookUp->getLabel() << '\t' << groups[i] << '\t' << thisLookUp->getNumBins(); for (int j = 0; j < thisLookUp->getNumBins(); j++) { if (m->getControl_pressed()) { return 0; } int abund = thisLookUp->get(j, groups[i]); float relabund = 0.0; if (scale == "totalgroup") { relabund = abund / (float) thisLookUp->getNumSeqs(groups[i]); }else if (scale == "totalotu") { //calc the total in this otu int totalOtu = thisLookUp->getOTUTotal(j); relabund = abund / (float) totalOtu; }else if (scale == "averagegroup") { relabund = abund / (float) (thisLookUp->getNumSeqs(groups[i]) / (float) thisLookUp->getNumBins()); }else if (scale == "averageotu") { //calc the total in this otu int totalOtu = thisLookUp->getOTUTotal(j); float averageOtu = totalOtu / (float) thisLookUp->size(); relabund = abund / (float) averageOtu; }else{ m->mothurOut(scale + " is not a valid scaling option.\n"); m->setControl_pressed(true); return 0; } out << '\t' << relabund; } out << endl; } return 0; } catch(exception& e) { m->errorOut(e, "GetRelAbundCommand", "getRelAbundance"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getrelabundcommand.h000077500000000000000000000021101424121717000222230ustar00rootroot00000000000000#ifndef GETRELABUNDCOMMAND_H #define GETRELABUNDCOMMAND_H /* * getrelabundcommand.h * Mothur * * Created by westcott on 6/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" class GetRelAbundCommand : public Command { public: GetRelAbundCommand(string); ~GetRelAbundCommand(){} vector setParameters(); string getCommandName() { return "get.relabund"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.relabund"; } string getDescription() { return "calculates the relative abundance of each OTU in a sample"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: SharedRAbundVectors* lookup; bool abort, allLines, pickedGroups; set labels; //holds labels to be used string groups, label, scale, sharedfile; vector Groups, outputNames; int getRelAbundance(SharedRAbundVectors*&, ofstream&); }; #endif mothur-1.48.0/source/commands/getsabundcommand.cpp000077500000000000000000000244101424121717000222450ustar00rootroot00000000000000/* * getsabundcommand.cpp * Mothur * * Created by Sarah Westcott on 6/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "getsabundcommand.h" //********************************************************************************************************************** vector GetSAbundCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","sabund",false,false, true); parameters.push_back(plist); CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none","",false,false, false); parameters.push_back(pcount); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","sabund",false,false, true); parameters.push_back(prabund); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["sabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetSAbundCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.sabund command parameters is list, rabund, count and label. list or rabund is required unless a valid current file exists.\n"; helpString += "The count parameter allows you to provide a count file associated with your list file. If you clustered with a countfile the list file only contains the unique sequences and you will want to add the redundant counts into the sabund file, providing the count file allows you to do so.\n"; helpString += "The label parameter allows you to select what distance levels you would like included in your .sabund file, and are separated by dashes.\n"; helpString += "The get.sabund command should be in the following format: get.sabund(label=yourLabels).\n"; helpString += "Example get.sabund().\n"; helpString += "The default value for label is all labels in your inputfile.\n"; helpString += "The get.sabund command outputs a .sabund file containing the labels you selected.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetSAbundCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sabund") { pattern = "[filename],sabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetSAbundCommand::GetSAbundCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } if ((listfile == "") && (rabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list or rabund file.\n"); abort = true; } } } if ((countfile != "") && (listfile == "")) { m->mothurOut("[ERROR]: You can only use the count file with a list file, aborting.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } } } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "GetSAbundCommand"); exit(1); } } //********************************************************************************************************************** int GetSAbundCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); filename = getOutputFileName("sabund", variables); util.openOutputFile(filename, out); if (countfile != "") { processList(out); }else { InputData input(inputfile, format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SAbundVector* sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); while (sabund != nullptr) { if (m->getControl_pressed()) { delete sabund; break; } sabund->print(out); delete sabund; sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); } } out.close(); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(filename); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(filename); m->mothurOutEndLine(); outputNames.push_back(filename); outputTypes["sabund"].push_back(filename); m->mothurOutEndLine(); //set sabund file as new current sabundfile string currentName = ""; itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "execute"); exit(1); } } //********************************************************************************************************************** int GetSAbundCommand::processList(ofstream& out){ try { CountTable ct; ct.readTable(countfile, false, false); InputData input(inputfile, format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); if (m->getControl_pressed()) { delete list; return 0; } while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } RAbundVector* rabund = new RAbundVector(); createRabund(ct, list, rabund); SAbundVector sabund = rabund->getSAbundVector(); sabund.print(out); delete rabund; delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "processList"); exit(1); } } //********************************************************************************************************************** int GetSAbundCommand::createRabund(CountTable& ct, ListVector*& list, RAbundVector*& rabund){ try { rabund->setLabel(list->getLabel()); for(int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { return 0; } vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += ct.getNumSeqs(binNames[j]); } rabund->push_back(total); } return 0; } catch(exception& e) { m->errorOut(e, "GetSAbundCommand", "createRabund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getsabundcommand.h000077500000000000000000000022151424121717000217110ustar00rootroot00000000000000#ifndef GETSABUNDCOMMAND_H #define GETSABUNDCOMMAND_H /* * getsabundcommand.h * Mothur * * Created by Sarah Westcott on 6/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "sabundvector.hpp" class GetSAbundCommand : public Command { public: GetSAbundCommand(string); ~GetSAbundCommand() = default; vector setParameters(); string getCommandName() { return "get.sabund"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.sabund"; } string getDescription() { return "creates a sabund file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string filename, format, inputfile, listfile, rabundfile, countfile; ofstream out; vector outputNames; bool abort, allLines; set labels; //holds labels to be used string label; int processList(ofstream& out); int createRabund(CountTable& ct, ListVector*& list, RAbundVector*& rabund); }; #endif mothur-1.48.0/source/commands/getseqscommand.cpp000066400000000000000000001755421424121717000217560ustar00rootroot00000000000000/* * getseqscommand.cpp * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "getseqscommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "counttable.h" #include "fastqread.h" #include "inputdata.h" #include "contigsreport.hpp" #include "alignreport.hpp" //********************************************************************************************************************** vector GetSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","contigsreport",false,false); parameters.push_back(pcontigsreport); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos2); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["contigsreport"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["accnosreport"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq, contigsreport or alignreport file.\n"; helpString += "It outputs a file containing only the sequences in the .accnos file.\n"; helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, contigsreport, fastq and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n"; helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "You may enter multiple files of the same type separated by dashes. For example: get.seqs(accnos=yourAccnos, fastq=forward.fastq-reverse.fastq).\n"; helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n"; helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "fastq") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "qfile") { pattern = "[filename],pick,[extension]"; } else if (type == "accnosreport") { pattern = "[filename],pick.accnos.report"; } else if (type == "alignreport") { pattern = "[filename],pick.[extension]"; } else if (type == "contigsreport") { pattern = "[filename],pick.[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** string GetSeqsCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string howto = "How do I get the reads present in BOTH my paired fastq files to resolve file mismatches?\n"; howtos.push_back(howto); string hanswer = "\tYou can use a combination of the list.seqs and get.seqs command as follows:\n\n"; hanswer += "\tmothur > list.seqs(fastq=foward.fastq-reverse.fastq)\n\tmothur > get.seqs(fastq=foward.fastq-reverse.fastq, accnos=current)\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** GetSeqsCommand::GetSeqsCommand(unordered_set n, pair ffile, pair lfile, pair dupsFile, string dupsFileType) : Command() { try { names = n; dups = true; abort = false; calledHelp = false; vector tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["list"] = tempOutNames; if (dupsFile.first != "") { if (dupsFileType == "count") { readCount(dupsFile.first, dupsFile.second); } else { readName(dupsFile.first, dupsFile.second); } } if (ffile.first != "") { readFasta(ffile.first, ffile.second); } if (lfile.first != "") { readList(lfile.first, lfile.second); } } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** GetSeqsCommand::GetSeqsCommand(unordered_map > names, string ffile, vector ofile, vector g) : Command() { try { abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; readFasta(names, ffile, ofile, g); } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** GetSeqsCommand::GetSeqsCommand(unordered_set n, pair ffile, pair > lfile, pair dupsFile, string dupsFileType) : Command() { try { names = n; dups = true; abort = false; calledHelp = false; vector tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["list"] = tempOutNames; if (dupsFile.first != "") { if (dupsFileType == "count") { readCount(dupsFile.first, dupsFile.second); } else { readName(dupsFile.first, dupsFile.second); } } if (ffile.first != "") { readFasta(ffile.first, ffile.second); } if (lfile.first != "") { readList(lfile.first, lfile.second); } } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** GetSeqsCommand::GetSeqsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } if (accnosfile2 == "not found") { accnosfile2 = ""; } fastafiles = validParameter.validFiles(parameters, "fasta"); if (fastafiles.size() != 0) { if (fastafiles[0] == "not open") { abort = true; } else { current->setFastaFile(fastafiles[0]); } } namefiles = validParameter.validFiles(parameters, "name"); if (namefiles.size() != 0) { if (namefiles[0] == "not open") { abort = true; } else { current->setNameFile(namefiles[0]); } } groupfiles = validParameter.validFiles(parameters, "group"); if (groupfiles.size() != 0) { if (groupfiles[0] == "not open") { abort = true; } else { current->setGroupFile(groupfiles[0]); } } alignfiles = validParameter.validFiles(parameters, "alignreport"); if (alignfiles.size() != 0) { if (alignfiles[0] == "not open") { abort = true; } } contigsreportfiles = validParameter.validFiles(parameters, "contigsreport"); if (contigsreportfiles.size() != 0) { if (contigsreportfiles[0] == "not open") { abort = true; } else { current->setContigsReportFile(contigsreportfiles[0]); } } listfiles = validParameter.validFiles(parameters, "list"); if (listfiles.size() != 0) { if (listfiles[0] == "not open") { abort = true; } else { current->setListFile(listfiles[0]); } } taxfiles = validParameter.validFiles(parameters, "taxonomy"); if (taxfiles.size() != 0) { if (taxfiles[0] == "not open") { abort = true; } else { current->setTaxonomyFile(taxfiles[0]); } } countfiles = validParameter.validFiles(parameters, "count"); if (countfiles.size() != 0) { if (countfiles[0] == "not open") { abort = true; } else { current->setCountFile(countfiles[0]); } } fastqfiles = validParameter.validFiles(parameters, "fastq"); if (fastqfiles.size() != 0) { if (fastqfiles[0] == "not open") { abort = true; } } qualityfiles = validParameter.validFiles(parameters, "qfile"); if (qualityfiles.size() != 0) { if (qualityfiles[0] == "not open") { abort = true; } else { current->setQualFile(qualityfiles[0]); } } if ((qualityfiles.size() == 0) && (fastqfiles.size() == 0) && (countfiles.size() == 0) && (fastafiles.size() == 0) && (namefiles.size() == 0) && (listfiles.size() == 0) && (groupfiles.size() == 0) && (alignfiles.size() == 0) && (taxfiles.size() == 0) && (contigsreportfiles.size() == 0)) { m->mothurOut("You must provide a file.\n"); abort = true; } string usedDups = "true"; string temp = validParameter.valid(parameters, "dups"); if (temp == "not found") { temp = "true"; usedDups = ""; } dups = util.isTrue(temp); format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting.\n" ); abort=true; } //read accnos file if (!abort) { names = util.readAccnos(accnosfile); } } } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand"); exit(1); } } //********************************************************************************************************************** int GetSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string fastafile = ""; string namefile = ""; string qualfile = ""; string taxfile = ""; string groupfile = ""; string listfile = ""; //read through the correct file and output lines you want to keep if (namefiles.size() != 0) { namefile = namefiles[0]; for (int i = 0; i < namefiles.size(); i++) { readName(namefiles[i]); } } if (fastafiles.size() != 0) { fastafile = fastafiles[0]; for (int i = 0; i < fastafiles.size(); i++) { readFasta(fastafiles[i]); } } if (qualityfiles.size() != 0) { qualfile = qualityfiles[0]; for (int i = 0; i < qualityfiles.size(); i++) { readQual(qualityfiles[i]); } } if (groupfiles.size() != 0) { groupfile = groupfiles[0]; for (int i = 0; i < groupfiles.size(); i++) { readGroup(groupfiles[i]); } } if (taxfiles.size() != 0) { taxfile = taxfiles[0]; for (int i = 0; i < taxfiles.size(); i++) { readTax(taxfiles[i]); } } if (listfiles.size() != 0) { listfile = listfiles[0]; for (int i = 0; i < listfiles.size(); i++) { readList(listfiles[i]); } } if (alignfiles.size() != 0) { for (int i = 0; i < alignfiles.size(); i++) { readAlign(alignfiles[i]); } } if (contigsreportfiles.size() != 0) { for (int i = 0; i < contigsreportfiles.size(); i++) { readContigs(contigsreportfiles[i]); } } if (countfiles.size() != 0) { for (int i = 0; i < countfiles.size(); i++) { readCount(countfiles[i]); } } if (fastqfiles.size() != 0) { for (int i = 0; i < fastqfiles.size(); i++) { readFastq(fastqfiles[i]); } } if (accnosfile2 != "") { compareAccnos(namefile); } if (m->getDebug()) { runSanityCheck(fastafile, namefile, qualfile, taxfile, groupfile, listfile); } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readGZFastq(string fastqfile){ try { #ifdef USE_BOOST string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); variables["[extension]"] = ".fastq" + util.getExtension(fastqfile); string outputFileName = getOutputFileName("fastq", variables); ifstream in; boost::iostreams::filtering_istream inBoost; util.openInputFileBinary(fastqfile, in, inBoost); ofstream file; ostream* out; boost::iostreams::filtering_streambuf outBoost; util.openOutputFileBinary(outputFileName, file, out, outBoost); bool wroteSomething = false; int selectedCount = 0; set uniqueNames; while(!inBoost.eof()){ if (m->getControl_pressed()) { break; } //read sequence name bool ignore; FastqRead fread(inBoost, ignore, format); gobble(inBoost); if (!ignore) { string name = fread.getName(); if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; selectedCount++; fread.printFastq(*out); uniqueNames.insert(name); }else { m->mothurOut("[WARNING]: " + name + " is in your fastq file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } gobble(inBoost); } in.close(); inBoost.pop(); boost::iostreams::close(outBoost); file.close(); delete out; if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return; } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastqfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + fastqfile + ".\n"); #else m->mothurOut("[ERROR]: mothur requires the boost libraries to read and write compressed files. Please decompress your files and rerun.\n"); #endif return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readFastq(string fastqfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); variables["[extension]"] = util.getExtension(fastqfile); string outputFileName = getOutputFileName("fastq", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastqfile, in); bool wroteSomething = false; int selectedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } //read sequence name bool ignore; FastqRead fread(in, ignore, format); gobble(in); if (!ignore) { string name = fread.getName(); if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; selectedCount++; fread.printFastq(out); uniqueNames.insert(name); }else { m->mothurOut("[WARNING]: " + name + " is in your fastq file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastqfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + fastqfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readFasta(string fastafile, string outputFileName){ try { ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string name; bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["fasta"] = temp; } set uniqueNames; int line = 0; int redundNum = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } Sequence currSeq(in); name = currSeq.getName(); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { currSeq.setName(it->second); } } name = currSeq.getName(); if (name != "") { line++; //if this name is in the accnos file if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; currSeq.printSequence(out); selectedCount++; uniqueNames.insert(name); if (m->getDebug()) { sanity["fasta"].insert(name); } }else { m->mothurOut("[WARNING]: " + name + " is in your fasta file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); redundNum++; } } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastafile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + fastafile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** //assumes nameToGroup[seq1] -> 1,3 means seq1 should be written to outputFiles[1] and outputFiles[3] void GetSeqsCommand::readFasta(unordered_map > nameToGroups, string fastafile, vector outputFiles, vector groups){ try { unordered_map >::iterator it; vector outputs; vector selectedCounts; selectedCounts.resize(outputFiles.size(), 0); for (string filename : outputFiles) { ofstream* out = new ofstream(); util.openOutputFile(filename, *out); outputs.push_back(out); } ifstream in; util.openInputFile(fastafile, in); string name; bool wroteSomething = false; set uniqueNames; int redundNum = 0; while(!in.eof()){ if (m->getControl_pressed()) { break; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { it = nameToGroups.find(name); if (it != nameToGroups.end()) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; vector outputIndex = it->second; for (int i : outputIndex) { currSeq.printSequence(*outputs[i]); selectedCounts[i]++; } uniqueNames.insert(name); }else { m->mothurOut("[WARNING]: " + name + " is in your fasta file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); redundNum++; } } } gobble(in); } in.close(); for (ofstream* out : outputs) { out->close(); delete out; } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastafile + " does not contain any sequence from the .accnos file.\n"); } for (int i = 0; i < outputFiles.size(); i++) { outputNames.push_back(outputFiles[i]); outputTypes["fasta"].push_back(outputFiles[i]); m->mothurOut("Selected " + toString(selectedCounts[i]) + " sequences from " + groups[i] + ".\n"); } return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readFasta(string fastafile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); readFasta(fastafile, outputFileName); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readQual(string qualfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); variables["[extension]"] = util.getExtension(qualfile); string outputFileName = getOutputFileName("qfile", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(qualfile, in); string name; bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["qual"] = temp; } set uniqueNames; while(!in.eof()){ QualityScores qual(in); gobble(in); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(qual.getName()); if (it != uniqueMap.end()) { qual.setName(it->second); } } string name = qual.getName(); if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; qual.printQScores(out); selectedCount++; if (m->getDebug()) { sanity["qual"].insert(name); } }else { m->mothurOut("[WARNING]: " + name + " is in your qfile more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + qualfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + qualfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readQual"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readCount(string countfile, string outputFileName){ try { CountTable ct; ct.readTable(countfile, true, false, names); bool wroteSomething = false; int selectedCount = ct.getNumSeqs(); if (selectedCount != 0) { wroteSomething = true; } ct.printTable(outputFileName); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + countfile + " does not contain any sequence from the .accnos file.\n"); } outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + countfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readCount"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readCount(string countfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); readCount(countfile, outputFileName); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readCount"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readList(string listfile, string outputFileName){ try { InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int selectedCount = processList(list, outputFileName, wroteSomething); delete list; if (wroteSomething == false) { m->mothurOut("[WARNING]: " + listfile + " does not contain any sequence from the .accnos file.\n"); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + listfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** int GetSeqsCommand::processList(ListVector*& list, string outputFileName, bool& wroteSomething){ try { vector binLabels = list->getLabels(); vector newBinLabels; set uniqueNames; int selectedCount = 0; ofstream out; util.openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); if (m->getControl_pressed()) { out.close(); return selectedCount; } //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { //parse out names that are in accnos file string binnames = list->get(i); vector bnames; util.splitAtComma(binnames, bnames); string newNames = ""; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; //if that name is in the .accnos file, add it if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); newNames += name + ","; selectedCount++; if (m->getDebug()) { sanity["list"].insert(name); } }else { m->mothurOut("[WARNING]: " + name + " is in your list file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.print(out, false); } out.close(); return selectedCount; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "processList"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readList(string listfile, vector outputFileNames){ try { if (outputFileNames.size() == 1) { return (readList(listfile, outputFileNames[0])); } InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int selectedCount = 0; int distCount = 0; if (m->getDebug()) { set temp; sanity["list"] = temp; } while((list != nullptr) && (distCount < outputFileNames.size())){ selectedCount = processList(list, outputFileNames[distCount], wroteSomething); distCount++; delete list; list = input.getListVector(); } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + listfile + " does not contain any sequence from the .accnos file.\n"); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + listfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readList(string listfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["list"] = temp; } while(list != nullptr) { variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); selectedCount = processList(list, outputFileName, wroteSomething); delete list; list = input.getListVector(); } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + listfile + " does not contain any sequence from the .accnos file.\n"); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + listfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readName(string namefile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputFileName = getOutputFileName("name", variables); readName(namefile, outputFileName); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readName"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readName(string namefile, string outputFileName){ try { ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(namefile, in); string name, firstCol, secondCol; bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["name"] = temp; } if (m->getDebug()) { set temp; sanity["dupname"] = temp; } set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> firstCol; gobble(in); in >> secondCol; gobble(in); string hold = ""; if (dups) { hold = secondCol; } vector parsedNames; util.splitAtComma(secondCol, parsedNames); vector validSecond; vector parsedNames2; bool parsedError = false; for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) != 0) { if (uniqueNames.count(parsedNames[i]) == 0) { //this name hasn't been seen yet uniqueNames.insert(parsedNames[i]); validSecond.push_back(parsedNames[i]); parsedNames2.push_back(parsedNames[i]); if (m->getDebug()) { sanity["dupname"].insert(parsedNames[i]); } }else { m->mothurOut("[WARNING]: " + parsedNames[i] + " is in your name file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); parsedError = true; } } } if (parsedError) { parsedNames = parsedNames2; hold = ""; if (parsedNames.size() != 0) { for (int i = 0; i < parsedNames.size()-1; i++) { hold += parsedNames[i] + ','; } hold += parsedNames[parsedNames.size()-1] + '\n'; } } if (dups && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); if (m->getDebug()) { sanity["dupname"].insert(parsedNames[i]); } } out << firstCol << '\t' << hold << endl; wroteSomething = true; selectedCount += parsedNames.size(); if (m->getDebug()) { sanity["name"].insert(firstCol); } }else { if (validSecond.size() != 0) { selectedCount += validSecond.size(); //if the name in the first column is in the set then print it and any other names in second column also in set if (names.count(firstCol) != 0) { wroteSomething = true; out << firstCol << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; if (m->getDebug()) { sanity["name"].insert(firstCol); } //make first name in set you come to first column and then add the remaining names to second column }else { //you want part of this row if (validSecond.size() != 0) { wroteSomething = true; out << validSecond[0] << '\t'; //we are changing the unique name in the fasta file uniqueMap[firstCol] = validSecond[0]; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; if (m->getDebug()) { sanity["name"].insert(validSecond[0]); } } } } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + namefile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + namefile + " file.\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readName"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readGroup(string groupfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(groupfile, in); string name, group; bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["group"] = temp; } set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); //read from first column in >> group; gobble(in); //read from second column if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; out << name << '\t' << group << endl; selectedCount++; if (m->getDebug()) { sanity["group"].insert(name); } }else { m->mothurOut("[WARNING]: " + name + " is in your group file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + groupfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + groupfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readGroup"); exit(1); } } //********************************************************************************************************************** void GetSeqsCommand::readTax(string taxfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; int selectedCount = 0; if (m->getDebug()) { set temp; sanity["tax"] = temp; } set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); tax = util.getline(in); gobble(in); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; out << name << '\t' << tax << endl; selectedCount++; if (m->getDebug()) { sanity["tax"].insert(name); } }else { m->mothurOut("[WARNING]: " + name + " is in your taxonomy file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + taxfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + taxfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name void GetSeqsCommand::readAlign(string alignfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(alignfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(alignfile)); variables["[extension]"] = util.getExtension(alignfile); string outputFileName = getOutputFileName("alignreport", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(alignfile, in); AlignReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); bool wroteSomething = false; int selectedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } report.read(in); gobble(in); string name = report.getQueryName(); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } //if this name is in the accnos file if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; selectedCount++; report.print(out); }else { m->mothurOut("[WARNING]: " + name + " is in your alignreport file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + alignfile + " does not contain any sequence from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + alignfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readAlign"); exit(1); } } //********************************************************************************************************************** //contigsreport file has a column header line then all other lines contain 8 columns. we just want the first column since that contains the name void GetSeqsCommand::readContigs(string contigsreportfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(contigsreportfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(contigsreportfile)); variables["[extension]"] = util.getExtension(contigsreportfile); string outputFileName = getOutputFileName("contigsreport", variables); ofstream out; util.openOutputFile(outputFileName, out); bool wroteSomething = false; int selectedCount = 0; set uniqueNames; ifstream in; util.openInputFile(contigsreportfile, in); ContigsReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); while(!in.eof()){ if (m->getControl_pressed()) { break; } report.read(in); gobble(in); string name = report.getName(); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } if (names.count(name) != 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; selectedCount++; report.print(out); }else { m->mothurOut("[WARNING]: " + name + " is in your contigsreport file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + contigsreportfile + " does not contain any sequence from the .accnos file.\n"); ofstream out1; util.openOutputFile(outputFileName, out1); out1.close(); } //reopening file clears header line outputNames.push_back(outputFileName); outputTypes["contigsreport"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from " + contigsreportfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readContigs"); exit(1); } } //********************************************************************************************************************** //just looking at common mistakes. int GetSeqsCommand::runSanityCheck(string fastafile, string namefile, string qualfile, string taxfile, string groupfile, string listfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } string filename = outputdir + "get.seqs.debug.report"; ofstream out; util.openOutputFile(filename, out); //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs if (fastafile != "") { if (namefile != "") { //compare with fasta if (sanity["fasta"] != sanity["name"]) { //create mismatch file createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]); } } if (qualfile != "") { if (sanity["fasta"] != sanity["qual"]) { //create mismatch file createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]); } } if (taxfile != "") { if (sanity["fasta"] != sanity["tax"]) { //create mismatch file createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]); } } } //compare dupnames, groups and list if given to make sure they match if (namefile != "") { if (groupfile != "") { if (sanity["dupname"] != sanity["group"]) { //create mismatch file createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]); } } if (listfile != "") { if (sanity["dupname"] != sanity["list"]) { //create mismatch file createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]); } } }else{ if ((groupfile != "") && (fastafile != "")) { if (sanity["fasta"] != sanity["group"]) { //create mismatch file createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]); } } } out.close(); if (util.isBlank(filename)) { util.mothurRemove(filename); } else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); } return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "runSanityCheck"); exit(1); } } //********************************************************************************************************************** //just looking at common mistakes. int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set set1, set set2){ try { out << "****************************************" << endl << endl; out << "Names unique to " << filename1 << ":\n"; //remove names in set1 that are also in set2 for (set::iterator it = set1.begin(); it != set1.end();) { string name = *it; if (set2.count(name) == 0) { out << name << endl; } //name unique to set1 else { set2.erase(name); } //you are in both so erase set1.erase(it++); } out << "\nNames unique to " << filename2 << ":\n"; //output results for (set::iterator it = set2.begin(); it != set2.end(); it++) { out << *it << endl; } out << "****************************************" << endl << endl; return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "runSanityCheck"); exit(1); } } //********************************************************************************************************************** int GetSeqsCommand::compareAccnos(string namefile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(accnosfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(accnosfile)); string outputFileName = getOutputFileName("accnosreport", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(accnosfile2, in); string name; set namesAccnos2; set namesDups; unordered_set namesAccnos = names; map nameCount; if (namefile != "") { ifstream inName; util.openInputFile(namefile, inName); while(!inName.eof()){ if (m->getControl_pressed()) { inName.close(); return 0; } string thisname, repnames; inName >> thisname; gobble(inName); //read from first column inName >> repnames; //read from second column int num = util.getNumNames(repnames); nameCount[thisname] = num; gobble(inName); } inName.close(); } while(!in.eof()){ in >> name; if (namesAccnos.count(name) == 0){ //name unique to accnos2 int pos = name.find_last_of('_'); string tempName = name; if (pos != string::npos) { tempName = tempName.substr(pos+1); } if (namesAccnos.count(tempName) == 0){ namesAccnos2.insert(name); }else { //you are in both so erase namesAccnos.erase(name); namesDups.insert(name); } }else { //you are in both so erase namesAccnos.erase(name); namesDups.insert(name); } gobble(in); } in.close(); out << "Names in both files : " + toString(namesDups.size()) << endl; m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine(); for (set::iterator it = namesDups.begin(); it != namesDups.end(); it++) { out << (*it); if (namefile != "") { out << '\t' << nameCount[(*it)]; } out << endl; } out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl; m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine(); for (auto it = namesAccnos.begin(); it != namesAccnos.end(); it++) { out << (*it); if (namefile != "") { out << '\t' << nameCount[(*it)]; } out << endl; } out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl; m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine(); for (set::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) { out << (*it); if (namefile != "") { out << '\t' << nameCount[(*it)]; } out << endl; } out.close(); outputNames.push_back(outputFileName); outputTypes["accnosreport"].push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "compareAccnos"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getseqscommand.h000077500000000000000000000070431424121717000214140ustar00rootroot00000000000000#ifndef GETSEQSCOMMAND_H #define GETSEQSCOMMAND_H /* * getseqscommand.h * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "sequencedb.h" //********************************************************************************************************************** class GetSeqsCommand : public Command { public: GetSeqsCommand(string); GetSeqsCommand(unordered_set, pair fasta, pair > list, pair dupsFile, string dupsFileType); GetSeqsCommand(unordered_set, pair fasta, pair list, pair dupsFile, string dupsFileType); GetSeqsCommand(unordered_map >, string fastafile, vector outputFiles, vector groups); ~GetSeqsCommand(){} vector setParameters(); string getCommandName() { return "get.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCommonQuestions(); string getCitation() { return "http://www.mothur.org/wiki/Get.seqs"; } string getDescription() { return "gets sequences from a list, fasta, count, name, group, alignreport, quality, fastq, contigsreport or taxonomy file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; vector fastafiles, namefiles, groupfiles, countfiles, alignfiles, listfiles, taxfiles, fastqfiles, contigsreportfiles, qualityfiles, outputNames; string accnosfile, accnosfile2, format, inputFileName; bool abort, dups; map uniqueMap; map > sanity; //for debug //maps file type to names chosen for file. something like "fasta" -> vector. If running in debug mode this is filled and we check to make sure all the files have the same names. If they don't we output the differences for the user. void readFasta(unordered_map > nameToGroups, string fastafile, vector outputFiles, vector); void readFasta(string); //inputFastaFile, mothur generates output name void readFasta(string, string); //inputFastaFile, outputName (internal use) void readName(string); //inputNameFile, mothur generates output name void readName(string, string); //inputNameFile, outputName (internal use) void readCount(string); //inputCountFile, mothur generates output name void readCount(string, string); //inputCountFile, outputName (internal use) void readList(string); //inputListFile, mothur generates output name void readList(string, string); //inputListFile, outputName (internal use) void readList(string, vector); //inputListFile, outputNames for each distance in list file (internal use) void readFastq(string); void readGZFastq(string); void readGroup(string); void readAlign(string); void readTax(string); void readQual(string); void readContigs(string); int compareAccnos(string); int runSanityCheck(string, string, string, string, string, string); int createMisMatchFile(ofstream&, string, string, set, set); int processList(ListVector*& list, string output, bool&); }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commands/getsharedotucommand.cpp000077500000000000000000000632111424121717000227710ustar00rootroot00000000000000/* * getsharedotucommand.cpp * Mothur * * Created by westcott on 9/22/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "getsharedotucommand.h" //********************************************************************************************************************** vector GetSharedOTUCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta); CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "groupList","",false,false,true); parameters.push_back(pgroup); CommandParameter pcount("count", "InputTypes", "", "", "none", "GroupCount", "none","",false,false); parameters.push_back(pcount); CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared); CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups); CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; userGroups = ""; unique = true; allLines = true; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["sharedseqs"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string GetSharedOTUCommand::getHelpString(){ try { string helpString = ""; helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta. The list and group or shared parameters are required, unless you have valid current files.\n"; helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n"; helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n"; helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n"; helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n"; helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n"; helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n"; helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n"; helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n"; helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n"; helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n"; helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n"; helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n"; helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n"; helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string GetSharedOTUCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[distance],[group],shared.fasta"; } else if (type == "accnos") { pattern = "[filename],[distance],[group],accnos"; } else if (type == "sharedseqs") { pattern = "[filename],[distance],[group],shared.seqs"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** GetSharedOTUCommand::GetSharedOTUCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; current->setListFile(listfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); CountTable temp; if (!temp.testGroups(countfile)) { m->mothurOut("[ERROR]: Your count file does not have group info, aborting.\n"); abort=true; } } if ((sharedfile == "") && (listfile == "")) { //look for currents //is there are current file available for either of these? //give priority to shared, then list sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or list file.\n"); abort = true; } } }else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("You may enter ONLY ONE of the following: shared or list.\n"); abort = true; } if (listfile != "") { if ((groupfile == "") && (countfile == "")) { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format.\n"); abort = true; } } } } if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file.\n"); abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } output = validParameter.valid(parameters, "output"); if (output == "not found") { output = ""; } else if (output == "default") { output = ""; } groups = validParameter.valid(parameters, "uniquegroups"); if (groups == "not found") { groups = ""; } else { userGroups = "unique." + groups; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } if (Groups.size() > 4) { userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big. } groups = validParameter.valid(parameters, "sharedgroups"); if (groups == "not found") { groups = ""; } else { userGroups = groups; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } if (Groups.size() > 4) { userGroups = "selected_groups"; } //if too many groups then the filename becomes too big. unique = false; } } } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "GetSharedOTUCommand"); exit(1); } } //********************************************************************************************************************** int GetSharedOTUCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if ( sharedfile != "") { runShared(); } else { if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return 0; } vector allGroups = groupMap->getNamesOfGroups(); }else{ ct = new CountTable(); ct->readTable(countfile, true, false); } if (m->getControl_pressed()) { delete groupMap; return 0; } if (Groups.size() == 0) { if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); } else { Groups = ct->getNamesOfGroups(); } //make string for outputfile name userGroups = "unique."; for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } userGroups = userGroups.substr(0, userGroups.length()-1); if (Groups.size() > 4) { userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big. } //put groups in map to find easier for(int i = 0; i < Groups.size(); i++) { groupFinder[Groups[i]] = Groups[i]; } if (fastafile != "") { ifstream inFasta; util.openInputFile(fastafile, inFasta); while(!inFasta.eof()) { if (m->getControl_pressed()) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; } Sequence seq(inFasta); gobble(inFasta); if (seq.getName() != "") { seqs.push_back(seq); } } inFasta.close(); } InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } if (groupfile != "") { delete groupMap; }else { delete ct; } return 0; } } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } if (output == "accnos") { itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "execute"); exit(1); } } /***********************************************************/ int GetSharedOTUCommand::process(ListVector* shared) { try { map fastaMap; ofstream outNames; string outputFileNames; if (outputdir == "") { outputdir += util.hasPath(listfile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[distance]"] = shared->getLabel(); variables["[group]"] = userGroups; if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); } else { outputFileNames = getOutputFileName("accnos", variables); } util.openOutputFile(outputFileNames, outNames); bool wroteSomething = false; int num = 0; //go through each bin, find out if shared vector binLabels = shared->getLabels(); for (int i = 0; i < shared->getNumBins(); i++) { if (m->getControl_pressed()) { outNames.close(); util.mothurRemove(outputFileNames); return 0; } bool uniqueOTU = true; map atLeastOne; for (int f = 0; f < Groups.size(); f++) { atLeastOne[Groups[f]] = 0; } vector namesOfSeqsInThisBin; string names = shared->get(i); vector binNames; util.splitAtComma(names, binNames); for(int j = 0; j < binNames.size(); j++) { string name = binNames[j]; //find group string seqGroup = "not found"; vector seqsGroups; if (groupfile != "") { seqGroup = groupMap->getGroup(name); } else { seqsGroups = ct->getGroups(name); seqGroup = util.getStringFromVector(seqsGroups, "-"); } if (output != "accnos") { namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + binLabels[i])); }else { namesOfSeqsInThisBin.push_back(name); } if (seqGroup == "not found") { m->mothurOut(name + " is not in your groupfile. Please correct.\n"); exit(1); } if (groupfile != "") { //is this seq in one of hte groups we care about it = groupFinder.find(seqGroup); if (it == groupFinder.end()) { uniqueOTU = false; } //you have a sequence from a group you don't want else { atLeastOne[seqGroup]++; } }else { for (int k = 0; k < seqsGroups.size(); k++) { //is this seq in one of hte groups we care about it = groupFinder.find(seqsGroups[k]); if (it == groupFinder.end()) { uniqueOTU = false; } //you have a sequence from a group you don't want else { atLeastOne[seqsGroups[k]]++; } } } } //make sure you have at least one seq from each group you want bool sharedByAll = true; map::iterator it2; for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) { if (it2->second == 0) { sharedByAll = false; } } //if the user wants unique bins and this is unique then print //or this the user wants shared bins and this bin is shared then print if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) { wroteSomething = true; num++; //output list of names for (int j = 0; j < namesOfSeqsInThisBin.size(); j++) { outNames << namesOfSeqsInThisBin[j] << endl; if (fastafile != "") { if (output != "accnos") { string seqName = namesOfSeqsInThisBin[j].substr(0,namesOfSeqsInThisBin[j].find_last_of('|')); seqName = seqName.substr(0,seqName.find_last_of('|')); fastaMap[seqName] = namesOfSeqsInThisBin[j]; //fastaMap needs to contain just the seq name for output later }else { fastaMap[namesOfSeqsInThisBin[j]] = namesOfSeqsInThisBin[j]; } } } } } outNames.close(); if (!wroteSomething) { util.mothurRemove(outputFileNames); string outputString = "\t" + toString(num) + " - No otus shared by groups"; string groupString = ""; for (int h = 0; h < Groups.size(); h++) { groupString += " " + Groups[h]; } outputString += groupString + "."; m->mothurOut(outputString); m->mothurOutEndLine(); }else { m->mothurOut(shared->getLabel() + "\t" + toString(num)+"\n"); outputNames.push_back(outputFileNames); if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); } else { outputTypes["accnos"].push_back(outputFileNames); } } //if fasta file provided output new fasta file if ((fastafile != "") && wroteSomething) { if (outputdir == "") { outputdir += util.hasPath(fastafile); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileFasta = getOutputFileName("fasta", variables); ofstream outFasta; util.openOutputFile(outputFileFasta, outFasta); outputNames.push_back(outputFileFasta); outputTypes["fasta"].push_back(outputFileFasta); for (int k = 0; k < seqs.size(); k++) { if (m->getControl_pressed()) { outFasta.close(); return 0; } //if this is a sequence we want, output it it = fastaMap.find(seqs[k].getName()); if (it != fastaMap.end()) { if (output != "accnos") { outFasta << ">" << it->second << endl; }else { outFasta << ">" << it->first << endl; } outFasta << seqs[k].getAligned() << endl; } } outFasta.close(); } return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "process"); exit(1); } } /***********************************************************/ int GetSharedOTUCommand::runShared() { try { InputData input(sharedfile, "sharedfile", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); if (Groups.size() == 0) { Groups = lookup->getNamesGroups(); } if (userGroups == "") { //make string for outputfile name userGroups = "unique."; for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; } userGroups = userGroups.substr(0, userGroups.length()-1); if (Groups.size() > 4) { userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big. } //put groups in map to find easier for(int i = 0; i < Groups.size(); i++) { groupFinder[Groups[i]] = Groups[i];} while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "runShared"); exit(1); } } /***********************************************************/ int GetSharedOTUCommand::process(SharedRAbundVectors*& lookup) { try { string outputFileNames; if (outputdir == "") { outputdir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup->getLabel(); variables["[group]"] = userGroups; if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); } else { outputFileNames = getOutputFileName("accnos", variables); } ofstream outNames; util.openOutputFile(outputFileNames, outNames); bool wroteSomething = false; int num = 0; //go through each bin, find out if shared for (int i = 0; i < lookup->getNumBins(); i++) { if (m->getControl_pressed()) { outNames.close(); util.mothurRemove(outputFileNames); return 0; } bool uniqueOTU = true; map atLeastOne; for (int f = 0; f < Groups.size(); f++) { atLeastOne[Groups[f]] = 0; } set namesOfGroupsInThisBin; vector groupNames = lookup->getNamesGroups(); for(int j = 0; j < lookup->size(); j++) { string seqGroup = groupNames[j]; string name = lookup->getOTUName(i); int abund = lookup->get(i, seqGroup); if (abund != 0) { if (output != "accnos") { namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(abund)); }else { namesOfGroupsInThisBin.insert(name); } //is this seq in one of the groups we care about it = groupFinder.find(seqGroup); if (it == groupFinder.end()) { uniqueOTU = false; } //you have sequences from a group you don't want else { atLeastOne[seqGroup]++; } } } //make sure you have at least one seq from each group you want bool sharedByAll = true; map::iterator it2; for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) { if (it2->second == 0) { sharedByAll = false; } } //if the user wants unique bins and this is unique then print //or this the user wants shared bins and this bin is shared then print if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) { wroteSomething = true; num++; //output list of names for (set::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) { outNames << (*itNames) << endl; } } } outNames.close(); if (!wroteSomething) { util.mothurRemove(outputFileNames); string outputString = "\t" + toString(num) + " - No otus shared by groups"; string groupString = ""; for (int h = 0; h < Groups.size(); h++) { groupString += " " + Groups[h]; } outputString += groupString + "."; m->mothurOut(outputString); m->mothurOutEndLine(); }else { m->mothurOut(lookup->getLabel() + "\t" + toString(num)+"\n"); outputNames.push_back(outputFileNames); if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); } else { outputTypes["accnos"].push_back(outputFileNames); } } return 0; } catch(exception& e) { m->errorOut(e, "GetSharedOTUCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/getsharedotucommand.h000077500000000000000000000033621424121717000224370ustar00rootroot00000000000000#ifndef GETSHAREDOTUCOMMAND_H #define GETSHAREDOTUCOMMAND_H /* * getsharedotucommand.h * Mothur * * Created by westcott on 9/22/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" #include "sequence.hpp" #include "groupmap.h" #include "counttable.h" #include "inputdata.h" //********************************************************************************************************************** class GetSharedOTUCommand : public Command { public: GetSharedOTUCommand(string); ~GetSharedOTUCommand() = default; vector setParameters(); string getCommandName() { return "get.sharedseqs"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getRequiredCommand() { return "none"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.sharedseqs"; } string getDescription() { return "identifies sequences that are either unique or shared by specific groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: ListVector* list; GroupMap* groupMap; CountTable* ct; set labels; string fastafile, label, groups, listfile, groupfile, sharedfile, output, userGroups, format, countfile; bool abort, allLines, unique; vector Groups; map groupFinder; map::iterator it; vector seqs; vector outputNames; int process(ListVector*); int process(SharedRAbundVectors*&); int runShared(); }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commands/heatmapcommand.cpp000077500000000000000000000325661424121717000217230ustar00rootroot00000000000000/* * heatmapcommand.cpp * Mothur * * Created by Sarah Westcott on 3/25/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "heatmapcommand.h" //********************************************************************************************************************** vector HeatMapCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false); parameters.push_back(psabund); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pscale("scale", "Multiple", "log10-log2-linear", "log10", "", "", "","",false,false); parameters.push_back(pscale); CommandParameter psorted("sorted", "Multiple", "none-shared-topotu-topgroup", "shared", "", "", "","",false,false); parameters.push_back(psorted); CommandParameter pnumotu("numotu", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnumotu); CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["svg"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string HeatMapCommand::getHelpString(){ try { string helpString = ""; helpString += "The heatmap.bin command parameters are shared, relabund, list, rabund, sabund, groups, sorted, scale, numotu, fontsize and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n"; helpString += "The sorted parameter allows you to order the otus displayed, default=shared, meaning display the shared otus first. Other options for sorted are none, meaning the exact representation of your otus, \n"; helpString += "topotu, meaning the otus with the greatest abundance when totaled across groups, topgroup, meaning the top otus for each group. \n"; helpString += "The scale parameter allows you to choose the range of color your bin information will be displayed with.\n"; helpString += "The numotu parameter allows you to display only the top N otus, by default all the otus are displayed. You could choose to look at the top 10, by setting numotu=10. The default for sorted is topotu when numotu is used.\n"; helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and are also separated by dashes.\n"; helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"; helpString += "The heatmap.bin command should be in the following format: heatmap.bin(groups=yourGroups, sorted=yourSorted, label=yourLabels).\n"; helpString += "Example heatmap.bin(groups=A-B-C, sorted=none, scale=log10).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += "The default value for scale is log10; your other options are log2 and linear.\n"; helpString += "The heatmap.bin command outputs a .svg file for each label you specify.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string HeatMapCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "svg") { pattern = "[filename],svg"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** HeatMapCommand::HeatMapCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { format = "relabund"; inputfile = relabundfile; current->setRelAbundFile(relabundfile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "") && (relabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file.\n"); abort = true; } } } } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "numotu"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, numOTU); temp = validParameter.valid(parameters, "fontsize"); if (temp == "not found") { temp = "24"; } util.mothurConvert(temp, fontSize); sorted = validParameter.valid(parameters, "sorted"); if (sorted == "not found") { //if numOTU is used change default if (numOTU != 0) { sorted = "topotu"; } else { sorted = "shared"; } } scale = validParameter.valid(parameters, "scale"); if (scale == "not found") { scale = "log10"; } if ((sorted != "none") && (sorted != "shared") && (sorted != "topotu") && (sorted != "topgroup")) { m->mothurOut(sorted + " is not a valid sorting option. Sorted options are: none, shared, topotu, topgroup\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "HeatMapCommand"); exit(1); } } //********************************************************************************************************************** int HeatMapCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; HeatMap heatmap(sorted, scale, numOTU, fontSize, outputdir, inputfile); if (format == "sharedfile") { SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } string outputFileName = heatmap.getPic(lookup); delete lookup; outputNames.push_back(outputFileName); outputTypes["svg"].push_back(outputFileName); lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } }else if ((format == "list") || (format == "rabund") || (format == "sabund")) { RAbundVector* rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); while (rabund != nullptr) { if (m->getControl_pressed()) { delete rabund; break; } string outputFileName = heatmap.getPic(rabund); delete rabund; outputNames.push_back(outputFileName); outputTypes["svg"].push_back(outputFileName); rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "relabund") { SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } string outputFileName = heatmap.getPic(lookup); delete lookup; outputNames.push_back(outputFileName); outputTypes["svg"].push_back(outputFileName); lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { if (outputNames[i] != "control") { util.mothurRemove(outputNames[i]); } } outputTypes.clear(); return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/heatmapcommand.h000077500000000000000000000022461424121717000213600ustar00rootroot00000000000000#ifndef HEATMAPCOMMAND_H #define HEATMAPCOMMAND_H /* * heatmapcommand.h * Mothur * * Created by Sarah Westcott on 3/25/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "sharedlistvector.h" #include "heatmap.h" #include "rabundvector.hpp" class HeatMapCommand : public Command { public: HeatMapCommand(string); ~HeatMapCommand(){} vector setParameters(); string getCommandName() { return "heatmap.bin"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Heatmap.bin"; } string getDescription() { return "generate a heatmap where the color represents the relative abundanceof an OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; set labels; //holds labels to be used string format, groups, sorted, scale, label, sharedfile, relabundfile, listfile, rabundfile, sabundfile, inputfile; vector Groups, outputNames; int numOTU, fontSize; }; #endif mothur-1.48.0/source/commands/heatmapsimcommand.cpp000077500000000000000000000446641424121717000224360ustar00rootroot00000000000000/* * heatmapsimcommand.cpp * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "heatmapsimcommand.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" //********************************************************************************************************************** vector HeatMapSimCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","svg",false,false,true); parameters.push_back(pshared); CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","svg",false,false); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false); parameters.push_back(pcount); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName","svg",false,false); parameters.push_back(pcolumn); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jest-thetayc", "", "", "","",true,false); parameters.push_back(pcalc); CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; format = ""; vector tempOutNames; outputTypes["svg"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "HeatMapSimCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string HeatMapSimCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The heatmap.sim command parameters are shared, phylip, column, name, count, groups, calc, fontsize and label. shared or phylip or column and name are required unless valid current files exist.\n"; helpString += "There are two ways to use the heatmap.sim command. The first is with a shared file, and you may use the groups, label and calc parameter. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n"; helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n"; helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"; helpString += "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, label=yourLabels).\n"; helpString += "Example heatmap.sim(groups=A-B-C, calc=jabund).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += validCalculator.printCalc("heat"); helpString += "The default value for calc is jclass-thetayc.\n"; helpString += "The heatmap.sim command outputs a .svg file for each calculator you choose at each label you specify.\n"; helpString += "The second way to use the heatmap.sim command is with a distance file representing the distance bewteen your groups. \n"; helpString += "Using the command this way, the phylip or column parameter are required, and only one may be used. If you use a column file the name filename is required. \n"; helpString += "The heatmap.sim command should be in the following format: heatmap.sim(phylip=yourDistanceFile).\n"; helpString += "Example heatmap.sim(phylip=amazonGroups.dist).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "HeatMapSimCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string HeatMapSimCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "svg") { pattern = "[filename],svg"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "HeatMapCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** HeatMapSimCommand::HeatMapSimCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { format = "phylip"; inputfile = phylipfile; current->setPhylipFile(phylipfile); if (outputdir == "") { outputdir += util.hasPath(phylipfile); } } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { format = "column"; inputfile = columnfile; current->setColumnFile(columnfile); if (outputdir == "") { outputdir += util.hasPath(columnfile); } } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "shared"; inputfile = sharedfile; current->setSharedFile(sharedfile); if (outputdir == "") { outputdir += util.hasPath(sharedfile); } } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } //error checking on files if ((sharedfile == "") && ((phylipfile == "") && (columnfile == ""))) { sharedfile = current->getSharedFile(); if (sharedfile != "") { format = "shared"; inputfile = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { format = "column"; inputfile = columnfile; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { format = "phylip"; inputfile = phylipfile; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or phylip or column file.\n"); abort = true; } } } } else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the heatmap.sim command with a distance file you may not use both the column and the phylip parameters.\n"); abort = true; } if (columnfile != "") { if (namefile == "") { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a name or count file if you are going to use the column format.\n"); abort = true; } } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "jest-thetayc"; } else { if (calc == "default") { calc = "jest-thetayc"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "fontsize"); if (temp == "not found") { temp = "24"; } util.mothurConvert(temp, fontsize); if (!abort) { ValidCalculators validCalculator; int i; for (i=0; ierrorOut(e, "HeatMapSimCommand", "HeatMapSimCommand"); exit(1); } } //********************************************************************************************************************** int HeatMapSimCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } heatmap = new HeatMapSim(outputdir, inputfile, fontsize); if (format == "shared") { runCommandShared(); } else { runCommandDist(); } delete heatmap; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "HeatMapSimCommand", "execute"); exit(1); } } //********************************************************************************************************************** int HeatMapSimCommand::runCommandShared() { try { //if the users entered no valid calculators don't execute command if (heatCalculators.size() == 0) { m->mothurOut("No valid calculators.\n"); return 0; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (lookup->size() < 2) { m->mothurOut("[ERROR]: You have not provided enough valid groups. I cannot run the command, quitting\n"); return 0;} while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } vector outfilenames = heatmap->getPic(lookup, heatCalculators, lookup->getNamesGroups()); delete lookup; for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "HeatMapSimCommand", "runCommandShared"); exit(1); } } //********************************************************************************************************************** int HeatMapSimCommand::runCommandDist() { try { vector< vector > matrix; vector names; ifstream in; //read distance file and create distance vector and names vector if (format == "phylip") { //read phylip file util.openInputFile(phylipfile, in); string name; int numSeqs; in >> numSeqs >> name; //save name names.push_back(name); //resize the matrix and fill with zeros matrix.resize(numSeqs); for(int i = 0; i < numSeqs; i++) { matrix[i].resize(numSeqs, 0.0); } //determine if matrix is square or lower triangle //if it is square read the distances for the first sequence char d; bool square = false; while((d=in.get()) != EOF){ //is d a number meaning its square if(isalnum(d)){ square = true; in.putback(d); for(int i=0;i> matrix[0][i]; } break; } //is d a line return meaning its lower triangle if(d == '\n'){ square = false; break; } } //read rest of matrix if (square ) { for(int i=1;i> name; names.push_back(name); if (m->getControl_pressed()) { return 0; } for(int j=0;j> matrix[i][j]; } gobble(in); } }else { double dist; for(int i=1;i> name; names.push_back(name); if (m->getControl_pressed()) { return 0; } for(int j=0;j> dist; matrix[i][j] = dist; matrix[j][i] = dist; } gobble(in); } } in.close(); }else { //read names file NameAssignment* nameMap; CountTable ct; if (namefile != "") { nameMap = new NameAssignment(namefile); nameMap->readMap(); //put names in order in vector for (int i = 0; i < nameMap->size(); i++) { names.push_back(nameMap->get(i)); } }else if (countfile != "") { nameMap = nullptr; ct.readTable(countfile, true, false); names = ct.getNamesOfSeqs(); } //resize matrix matrix.resize(names.size()); for (int i = 0; i < names.size(); i++) { matrix[i].resize(names.size(), 0.0); } //read column file string first, second; double dist; util.openInputFile(columnfile, in); while (!in.eof()) { in >> first >> second >> dist; gobble(in); if (m->getControl_pressed()) { return 0; } if (namefile != "") { map::iterator itA = nameMap->find(first); map::iterator itB = nameMap->find(second); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1); } //save distance matrix[itA->second][itB->second] = dist; matrix[itB->second][itA->second] = dist; }else if (countfile != "") { int itA = ct.get(first); int itB = ct.get(second); matrix[itA][itB] = dist; matrix[itB][itA] = dist; } } in.close(); if (namefile != "") { delete nameMap; } } string outputFileName = heatmap->getPic(matrix, names); outputNames.push_back(outputFileName); //vector>, vector outputTypes["svg"].push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "HeatMapSimCommand", "runCommandDist"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/heatmapsimcommand.h000077500000000000000000000024751424121717000220750ustar00rootroot00000000000000#ifndef HEATMAPSIMCOMMAND_H #define HEATMAPSIMCOMMAND_H /* * heatmapsimcommand.h * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "validcalculator.h" #include "heatmapsim.h" #include "nameassignment.hpp" class HeatMapSimCommand : public Command { public: HeatMapSimCommand(string); ~HeatMapSimCommand(){} vector setParameters(); string getCommandName() { return "heatmap.sim"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Heatmap.sim"; } string getDescription() { return "generate a heatmap indicating the pairwise distance between multiple samples using a variety of calculators"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector heatCalculators; HeatMapSim* heatmap; bool abort, allLines; set labels; //holds labels to be used string format, groups, label, calc, sharedfile, phylipfile, columnfile, countfile, namefile, inputfile; vector Estimators, Groups, outputNames; int fontsize; int runCommandShared(); int runCommandDist(); }; #endif mothur-1.48.0/source/commands/helpcommand.cpp000066400000000000000000001044221424121717000212200ustar00rootroot00000000000000/* * helpcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "helpcommand.h" #include "command.hpp" #include "clustercommand.h" #include "collectcommand.h" #include "collectsharedcommand.h" #include "getgroupcommand.h" #include "getlabelcommand.h" #include "rarefactcommand.h" #include "summarycommand.h" #include "summarysharedcommand.h" #include "rarefactsharedcommand.h" #include "quitcommand.h" #include "helpcommand.h" #include "commandfactory.hpp" #include "uniqueseqscommand.h" #include "parsimonycommand.h" #include "unifracunweightedcommand.h" #include "unifracweightedcommand.h" #include "libshuffcommand.h" #include "heatmapcommand.h" #include "heatmapsimcommand.h" #include "filterseqscommand.h" #include "venncommand.h" #include "nocommands.h" #include "binsequencecommand.h" #include "getoturepcommand.h" #include "treesharedcommand.h" #include "distancecommand.h" #include "aligncommand.h" #include "distsharedcommand.h" #include "getsabundcommand.h" #include "getrabundcommand.h" #include "seqsummarycommand.h" #include "screenseqscommand.h" #include "reversecommand.h" #include "trimseqscommand.h" #include "mergefilecommand.h" #include "listseqscommand.h" #include "getseqscommand.h" #include "removeseqscommand.h" #include "systemcommand.h" #include "aligncheckcommand.h" #include "getsharedotucommand.h" #include "getlistcountcommand.h" #include "classifyseqscommand.h" #include "phylotypecommand.h" #include "mgclustercommand.h" #include "preclustercommand.h" #include "pcoacommand.h" #include "otuhierarchycommand.h" #include "setdircommand.h" #include "chimeraccodecommand.h" #include "chimeracheckcommand.h" #include "chimeraslayercommand.h" #include "chimerapintailcommand.h" #include "chimerabellerophoncommand.h" #include "chimerauchimecommand.h" #include "setlogfilecommand.h" #include "phylodiversitycommand.h" #include "makegroupcommand.h" #include "chopseqscommand.h" #include "clearcutcommand.h" #include "splitabundcommand.h" #include "clustersplitcommand.h" #include "classifyotucommand.h" #include "degapseqscommand.h" #include "getrelabundcommand.h" #include "sensspeccommand.h" #include "sffinfocommand.h" #include "seqerrorcommand.h" #include "normalizesharedcommand.h" #include "metastatscommand.h" #include "splitgroupscommand.h" #include "clusterfragmentscommand.h" #include "getlineagecommand.h" #include "removelineagecommand.h" #include "fastaqinfocommand.h" #include "deuniqueseqscommand.h" #include "pairwiseseqscommand.h" #include "clusterdoturcommand.h" #include "subsamplecommand.h" #include "removegroupscommand.h" #include "getgroupscommand.h" #include "indicatorcommand.h" #include "consensusseqscommand.h" #include "trimflowscommand.h" #include "corraxescommand.h" #include "shhhercommand.h" #include "pcacommand.h" #include "nmdscommand.h" #include "removerarecommand.h" #include "mergegroupscommand.h" #include "amovacommand.h" #include "homovacommand.h" #include "mantelcommand.h" #include "makefastqcommand.h" #include "anosimcommand.h" #include "getcurrentcommand.h" #include "setcurrentcommand.h" #include "makesharedcommand.h" #include "deuniquetreecommand.h" #include "countseqscommand.h" #include "countgroupscommand.h" #include "summarytaxcommand.h" #include "chimeraperseuscommand.h" #include "shhhseqscommand.h" #include "summaryqualcommand.h" #include "otuassociationcommand.h" #include "sortseqscommand.h" #include "classifytreecommand.h" #include "cooccurrencecommand.h" #include "pcrseqscommand.h" #include "createdatabasecommand.h" #include "makebiomcommand.h" #include "getcoremicrobiomecommand.h" #include "listotuscommand.h" #include "getotuscommand.h" #include "removeotuscommand.h" #include "makecontigscommand.h" #include "sffmultiplecommand.h" #include "classifysvmsharedcommand.h" #include "filtersharedcommand.h" #include "primerdesigncommand.h" #include "getdistscommand.h" #include "removedistscommand.h" #include "mergetaxsummarycommand.h" #include "getmetacommunitycommand.h" #include "sparcccommand.h" #include "makelookupcommand.h" #include "renameseqscommand.h" #include "makelefsecommand.h" #include "lefsecommand.h" #include "kruskalwalliscommand.h" #include "sracommand.h" #include "mergesfffilecommand.h" #include "getmimarkspackagecommand.h" #include "mimarksattributescommand.h" #include "setseedcommand.h" #include "makefilecommand.h" #include "biominfocommand.h" #include "renamefilecommand.h" #include "chimeravsearchcommand.h" #include "mergecountcommand.hpp" //********************************************************************************************************************** HelpCommand::HelpCommand(string option) : Command() { validCommands = CommandFactory::getInstance(); abort = false; calledHelp = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } commandName = option; } //********************************************************************************************************************** string HelpCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string question = "How do I cite mothur?"; questions.push_back(question); string qanswer = "\tSchloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.\n"; qanswers.push_back(qanswer); question = "Do you have an example analysis?"; questions.push_back(question); qanswer = "\tYes, https://mothur.org/wiki/454_SOP and https://mothur.org/wiki/MiSeq_SOP highlight some of the things you can do with mothur.\n"; qanswers.push_back(qanswer); question = "Do you offer workshops?"; questions.push_back(question); qanswer = "\tYes! Please see our https://mothur.org/wiki/Workshops page for more information.\n"; qanswers.push_back(qanswer); question = "What are mothur's file types?"; questions.push_back(question); qanswer = "\tMothur uses and creates many file types. Including fasta, name, group, design, count, list, rabund, sabund, shared, relabund, oligos, taxonomy, constaxonomy, phylip, column, flow, qfile, file, biom and tree. You can find out more about these formats here: https://www.mothur.org/wiki/File_Types.\n"; qanswers.push_back(qanswer); question = "Is there a list of all of mothur's commands?"; questions.push_back(question); qanswer = "\tYes! You can find it here, http://www.mothur.org/wiki/Category:Commands.\n"; qanswers.push_back(qanswer); question = "Why does the cutoff change when I cluster with average neighbor?"; questions.push_back(question); qanswer = "\tThis is a product of using the average neighbor algorithm with a sparse distance matrix. When you run cluster, the algorithm looks for pairs of sequences to merge in the rows and columns that are getting merged together. Let's say you set the cutoff to 0.05. If one cell has a distance of 0.03 and the cell it is getting merged with has a distance above 0.05 then the cutoff is reset to 0.03, because it's not possible to merge at a higher level and keep all the data. All of the sequences are still there from multiple phyla. Incidentally, although we always see this, it is a bigger problem for people that include sequences that do not fully overlap.\n"; qanswers.push_back(qanswer); string issue = "Mothur can't find my input files. What wrong?"; issues.push_back(issue); string ianswer = "\tBy default, mothur will then look for the input files in the directory where mothur's executable is located. Mothur will also search the input, output and temporary default locations. You can set these locations using the set.dir command: set.dir(input=/users/myuser/desktop/mothurdata). Alternatively you can provide complete file names, or move the input files to mothur's executable location.\n"; ianswers.push_back(ianswer); issue = "I installed the latest version, but I am still running an older version. Why?"; issues.push_back(issue); ianswer = "\tWe often see this issue when you have an older version of mothur installed in your path. You can find out where by opening a terminal window and running: \n\n\tyourusername$ which mothur\n\tpath_to_old_version\n\tfor example: yourusername$ which mothur\n\t/usr/local/bin\n\n\tWhen you find the location of the older version, you can delete it or move it out of your path with the following:\n\n\tyourusername$ mv path_to_old_version/mothur new_location\n\tfor example: yourusername$ mv /usr/local/bin/mothur /Users/yourusername/desktop/old_version_mothur\n"; ianswers.push_back(ianswer); issue = "File Mismatches - 'yourSequence is in fileA but not in fileB, please correct.'"; issues.push_back(issue); ianswer = "\tThe most common reason this occurs is because you forgot to include a name or count file on a command, or accidentally included the wrong one due to a typo. Mothur has a 'current' option, which allows you to set file parameters to 'current'. For example, if fasta=current mothur will use the last fasta file given or created. The current option was designed to help avoid typo mistakes due to mothur's long filenames. Another reason this might occur is a process failing when you are using multiple processors. If a process dies, a file can be incomplete which would cause a mismatch error downstream.\n"; ianswers.push_back(ianswer); issue = "I don't have enough RAM or processing power. What are my options?"; issues.push_back(issue); ianswer = "\tIf you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required.\n\tAlternatively, you can use AWS to run your analysis. Here are instructions: https://mothur.org/wiki/Mothur_AMI.\n"; ianswers.push_back(ianswer); issue = "Mothur crashes when I read my distance file. What's wrong?"; issues.push_back(issue); ianswer = "\tThere are two common causes for this, file size and format.\n\n\tFileSize:\tThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split. Cluster.split divides the dataset by taxonomic assignment and generates matrices for each grouping, and then clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outline in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP\n\n\tWrong Format:\tThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.\n"; ianswers.push_back(ianswer); issue = "Why do I have such a large distance matrix?"; issues.push_back(issue); ianswer = "\tThis is most often caused by poor overlap of your reads. When reads have poor overlap, it greatly increases your error rate. Also, sequences that should cluster together don't because the errors appear to be genetic differences when in fact they are not. The quality of the data you are processing can not be overstressed. Error filled reads produce error filled results!\n\n\tCheck out Pat's blog: http://blog.mothur.org/2014/09/11/Why-such-a-large-distance-matrix/\n\n\tNOTE: To take a step back, if you look through our MiSeq SOP, you’ll see that we go to great pains to only work with the unique sequences to limit the number of sequences we have to align, screen for chimeras, classify, etc. We all know that 20 million reads will never make it through the pipeline without setting your computer on fire. Returning to the question at hand, you can imagine that if the reads do not fully overlap then any error in the 5’ end of the first read will be uncorrected by the 3’ end of the second read. If we assume for now that the errors are random, then every error will generate a new unique sequence. Granted, this happens less than 1% of the time, but multiply that by 20 million reads at whatever length you choose and you’ve got a big number. Viola, a bunch of unique reads and a ginormous distance matrix.\n"; ianswers.push_back(ianswer); issue = "Mothur reports a 'bad_alloc' error in the shhh.flows command. What's wrong?"; issues.push_back(issue); ianswer = "\tThis error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter.\n"; ianswers.push_back(ianswer); string howto = "How do I make a tree?"; howtos.push_back(howto); string hanswer = "\tMothur has two commands that create trees: clearcut and tree.shared.\n\n\tThe clearcut commands creates a phylogenetic tree that represents how sequences relate. The clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho. For more information about clearcut please refer to http://bioinformatics.hungry.com/clearcut/\n\n\tThe tree.shared command will generate a newick-formatted tree file that describes the dissimilarity (1-similarity) among multiple groups. Groups are clustered using the UPGMA algorithm using the distance between communities as calculated using any of the calculators describing the similarity in community membership or structure.\n"; hanswers.push_back(hanswer); howto = "How do I know 'who' is in an OTU in a shared file?"; howtos.push_back(howto); hanswer = "\tYou can run the get.otulist command on the list file you used to generate the shared file. You want to be sure you are comparing the same distances. ie final.opti_mcc.0.03.otulist would relate to the 0.03 distance in your shared file. Also, if you subsample your data set and want to compare things, be sure to subsample the list and group file and then create the shared file to make sure you are working with the same sequences.\n\n\tsub.sample(list=yourListFile, count=yourCountFile, persample=t)\n\tmake.shared(list=yourSubsampledListFile, group=yourSubsampledCountFile, label=0.03)\n\tget.otulist(list=yourSubsampledListFile, label=0.03)\n"; hanswers.push_back(hanswer); howto = "How do I know 'who' is in the OTUs represented in the venn picture?"; howtos.push_back(howto); hanswer = "\tYou can use the get.sharedseqs command. Be sure to pay close attention to the 'unique' and 'shared' parameters.\n"; hanswers.push_back(hanswer); howto = "How do I select certain sequences or groups of sequences?"; howtos.push_back(howto); hanswer = "\tMothur has several 'get' and 'remove' commands: get.seqs, get.lineage, get.groups, get.dists, get.otus, remove.seqs, remove.lineage, remove.dists, remove.otus and remove.groups.\n"; hanswers.push_back(hanswer); howto = "How do I visualize my results from mothur?"; howtos.push_back(howto); hanswer = "\tTo visual your data with R follow this tutorial http://www.riffomonas.org/minimalR/06_line_plots.html.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "HelpCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** int HelpCommand::execute(){ try { if (commandName != "") { if (validCommands->isValidCommand(commandName)) { Command* command; string optionString = "help"; if(commandName == "cluster") { command = new ClusterCommand(optionString); } else if(commandName == "unique.seqs") { command = new UniqueSeqsCommand(optionString); } else if(commandName == "parsimony") { command = new ParsimonyCommand(optionString); } else if(commandName == "help") { command = new HelpCommand(optionString); } else if(commandName == "quit") { command = new QuitCommand(optionString); } else if(commandName == "collect.single") { command = new CollectCommand(optionString); } else if(commandName == "collect.shared") { command = new CollectSharedCommand(optionString); } else if(commandName == "rarefaction.single") { command = new RareFactCommand(optionString); } else if(commandName == "rarefaction.shared") { command = new RareFactSharedCommand(optionString); } else if(commandName == "summary.single") { command = new SummaryCommand(optionString); } else if(commandName == "summary.shared") { command = new SummarySharedCommand(optionString); } else if(commandName == "unifrac.weighted") { command = new UnifracWeightedCommand(optionString); } else if(commandName == "unifrac.unweighted") { command = new UnifracUnweightedCommand(optionString); } else if(commandName == "get.group") { command = new GetgroupCommand(optionString); } else if(commandName == "get.label") { command = new GetlabelCommand(optionString); } else if(commandName == "get.sabund") { command = new GetSAbundCommand(optionString); } else if(commandName == "get.rabund") { command = new GetRAbundCommand(optionString); } else if(commandName == "libshuff") { command = new LibShuffCommand(optionString); } else if(commandName == "heatmap.bin") { command = new HeatMapCommand(optionString); } else if(commandName == "heatmap.sim") { command = new HeatMapSimCommand(optionString); } else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(optionString); } else if(commandName == "venn") { command = new VennCommand(optionString); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(optionString); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(optionString); } else if(commandName == "tree.shared") { command = new TreeSharedCommand(optionString); } else if(commandName == "dist.shared") { command = new DistSharedCommand(optionString); } else if(commandName == "dist.seqs") { command = new DistanceCommand(optionString); } else if(commandName == "align.seqs") { command = new AlignCommand(optionString); } else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(optionString); } else if(commandName == "screen.seqs") { command = new ScreenSeqsCommand(optionString); } else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { command = new TrimFlowsCommand(optionString); } else if(commandName == "shhh.flows") { command = new ShhherCommand(optionString); } else if(commandName == "list.seqs") { command = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { command = new GetSeqsCommand(optionString); } else if(commandName == "remove.seqs") { command = new RemoveSeqsCommand(optionString); } else if(commandName == "merge.files") { command = new MergeFileCommand(optionString); } else if(commandName == "system") { command = new SystemCommand(optionString); } else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); } else if(commandName == "get.sharedseqs") { command = new GetSharedOTUCommand(optionString); } else if(commandName == "get.otulist") { command = new GetListCountCommand(optionString); } else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); } else if(commandName == "chimera.ccode") { command = new ChimeraCcodeCommand(optionString); } else if(commandName == "chimera.check") { command = new ChimeraCheckCommand(optionString); } else if(commandName == "chimera.slayer") { command = new ChimeraSlayerCommand(optionString); } else if(commandName == "chimera.uchime") { command = new ChimeraUchimeCommand(optionString); } else if(commandName == "chimera.pintail") { command = new ChimeraPintailCommand(optionString); } else if(commandName == "chimera.bellerophon") { command = new ChimeraBellerophonCommand(optionString); } else if(commandName == "chimera.vsearch") { command = new ChimeraVsearchCommand(optionString); } else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); } else if(commandName == "mgcluster") { command = new MGClusterCommand(optionString); } else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); } else if(commandName == "pcoa") { command = new PCOACommand(optionString); } else if(commandName == "pca") { command = new PCACommand(optionString); } else if(commandName == "nmds") { command = new NMDSCommand(optionString); } else if(commandName == "otu.hierarchy") { command = new OtuHierarchyCommand(optionString); } else if(commandName == "set.dir") { command = new SetDirectoryCommand(optionString); } else if(commandName == "set.logfile") { command = new SetLogFileCommand(optionString); } else if(commandName == "phylo.diversity") { command = new PhyloDiversityCommand(optionString); } else if((commandName == "make.group") || (commandName == "make.count")) { command = new MakeGroupCommand(optionString); } else if(commandName == "chop.seqs") { command = new ChopSeqsCommand(optionString); } else if(commandName == "clearcut") { command = new ClearcutCommand(optionString); } else if(commandName == "split.abund") { command = new SplitAbundCommand(optionString); } else if(commandName == "cluster.split") { command = new ClusterSplitCommand(optionString); } else if(commandName == "classify.otu") { command = new ClassifyOtuCommand(optionString); } else if(commandName == "degap.seqs") { command = new DegapSeqsCommand(optionString); } else if(commandName == "get.relabund") { command = new GetRelAbundCommand(optionString); } else if(commandName == "sens.spec") { command = new SensSpecCommand(optionString); } else if(commandName == "seq.error") { command = new SeqErrorCommand(optionString); } else if(commandName == "sffinfo") { command = new SffInfoCommand(optionString); } else if(commandName == "normalize.shared") { command = new NormalizeSharedCommand(optionString); } else if(commandName == "metastats") { command = new MetaStatsCommand(optionString); } else if(commandName == "split.groups") { command = new SplitGroupCommand(optionString); } else if(commandName == "cluster.fragments") { command = new ClusterFragmentsCommand(optionString); } else if(commandName == "get.lineage") { command = new GetLineageCommand(optionString); } else if(commandName == "remove.lineage") { command = new RemoveLineageCommand(optionString); } else if(commandName == "get.groups") { command = new GetGroupsCommand(optionString); } else if(commandName == "remove.groups") { command = new RemoveGroupsCommand(optionString); } else if((commandName == "get.otus") || (commandName == "get.otulabels")) { command = new GetOtusCommand(optionString); } else if((commandName == "remove.otus") || (commandName == "remove.otulabels")) { command = new RemoveOtusCommand(optionString); } else if((commandName == "list.otus") ||(commandName == "list.otulabels")) { command = new ListOtusCommand(optionString); } else if(commandName == "fastq.info") { command = new ParseFastaQCommand(optionString); } else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); } else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); } else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); } else if(commandName == "sub.sample") { command = new SubSampleCommand(optionString); } else if(commandName == "indicator") { command = new IndicatorCommand(optionString); } else if(commandName == "consensus.seqs") { command = new ConsensusSeqsCommand(optionString); } else if(commandName == "corr.axes") { command = new CorrAxesCommand(optionString); } else if(commandName == "remove.rare") { command = new RemoveRareCommand(optionString); } else if(commandName == "merge.groups") { command = new MergeGroupsCommand(optionString); } else if(commandName == "merge.count") { command = new MergeCountCommand(optionString); } else if(commandName == "amova") { command = new AmovaCommand(optionString); } else if(commandName == "homova") { command = new HomovaCommand(optionString); } else if(commandName == "mantel") { command = new MantelCommand(optionString); } else if(commandName == "make.fastq") { command = new MakeFastQCommand(optionString); } else if(commandName == "get.current") { command = new GetCurrentCommand(optionString); } else if(commandName == "set.current") { command = new SetCurrentCommand(optionString); } else if(commandName == "anosim") { command = new AnosimCommand(optionString); } else if(commandName == "make.shared") { command = new SharedCommand(optionString); } else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); } else if((commandName == "count.seqs") || (commandName == "make.table")) { command = new CountSeqsCommand(optionString); } else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); } else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); } else if(commandName == "summary.qual") { command = new SummaryQualCommand(optionString); } else if(commandName == "chimera.perseus") { command = new ChimeraPerseusCommand(optionString); } else if(commandName == "shhh.seqs") { command = new ShhhSeqsCommand(optionString); } else if(commandName == "otu.association") { command = new OTUAssociationCommand(optionString); } else if(commandName == "sort.seqs") { command = new SortSeqsCommand(optionString); } else if(commandName == "classify.tree") { command = new ClassifyTreeCommand(optionString); } else if(commandName == "cooccurrence") { command = new CooccurrenceCommand(optionString); } else if(commandName == "pcr.seqs") { command = new PcrSeqsCommand(optionString); } else if(commandName == "create.database") { command = new CreateDatabaseCommand(optionString); } else if(commandName == "make.biom") { command = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { command = new GetCoreMicroBiomeCommand(optionString); } else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); } else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); } else if(commandName == "classify.svm") { command = new ClassifySvmSharedCommand(optionString); } else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); } else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); } else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); } else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); } else if(commandName == "get.communitytype") { command = new GetMetaCommunityCommand(optionString); } else if(commandName == "sparcc") { command = new SparccCommand(optionString); } else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); } else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); } else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); } else if(commandName == "lefse") { command = new LefseCommand(optionString); } else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); } else if(commandName == "make.sra") { command = new SRACommand(optionString); } else if(commandName == "merge.sfffiles") { command = new MergeSfffilesCommand(optionString); } else if(commandName == "get.mimarkspackage") { command = new GetMIMarksPackageCommand(optionString); } else if(commandName == "mimarks.attributes") { command = new MimarksAttributesCommand(optionString); } else if(commandName == "set.seed") { command = new SetSeedCommand(optionString); } else if(commandName == "make.file") { command = new MakeFileCommand(optionString); } else if(commandName == "biom.info") { command = new BiomInfoCommand(optionString); } else if(commandName == "rename.file") { command = new RenameFileCommand(optionString); } else { command = new NoCommand(optionString); } command->execute(); delete command; }else { m->mothurOut("[ERROR]: " + commandName + " is not a valid command.\n"); validCommands->printCommands(cout); } }else { //validCommands->printCommands(cout); validCommands->printCommandsCategories(cout); #if defined NON_WINDOWS cout << BOLDMAGENTA << "\nFor more information about a specific command type 'commandName(help)' i.e. 'cluster(help)'\n"; cout << RESET << endl; m->mothurOutJustToLog("\nFor more information about a specific command type 'commandName(help)' i.e. 'cluster(help)'\n"); #else m->mothurOut("\nFor more information about a specific command type 'commandName(help)' i.e. 'cluster(help)'\n"); #endif getCommonQuestions(); } #if defined NON_WINDOWS cout << BOLDMAGENTA << "\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n"; cout << RESET << endl; m->mothurOutJustToLog("\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n"); #else m->mothurOut("\nFor further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n"); #endif return 0; } catch(exception& e) { m->errorOut(e, "HelpCommand", "execute"); exit(1); } } //**********************************************************************************************************************/ mothur-1.48.0/source/commands/helpcommand.h000077500000000000000000000021571424121717000206720ustar00rootroot00000000000000#ifndef HELPCOMMAND_H #define HELPCOMMAND_H /* * helpcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class is designed to aid the user in running mothur. */ #include "command.hpp" #include "commandfactory.hpp" class HelpCommand : public Command { public: HelpCommand(string); ~HelpCommand(){} vector setParameters() { return outputNames; } //dummy, doesn't really do anything string getCommandName() { return "help"; } string getCommandCategory() { return "Hidden"; } string getHelpString() { return "For more information about a specific command type 'commandName(help)' i.e. 'cluster(help)'"; } string getCommonQuestions(); string getOutputPattern(string) { return ""; } string getCitation() { return "no citation"; } string getDescription() { return "help"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: CommandFactory* validCommands; vector outputNames; bool abort, calledHelp; string commandName; }; #endif mothur-1.48.0/source/commands/homovacommand.cpp000077500000000000000000000363101424121717000215640ustar00rootroot00000000000000/* * homovacommand.cpp * mothur * * Created by westcott on 2/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "homovacommand.h" #include "groupmap.h" #include "readphylipvector.h" #include "designmap.h" //********************************************************************************************************************** vector HomovaCommand::setParameters(){ try { CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pdesign); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pphylip); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["homova"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string HomovaCommand::getHelpString(){ try { string helpString = ""; helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n"; helpString += "The homova command outputs a .homova file. \n"; helpString += "The homova command parameters are phylip, iters, sets and alpha. The phylip and design parameters are required, unless valid current files exist.\n"; helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n"; helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n"; helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string HomovaCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "homova") { pattern = "[filename],homova"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** HomovaCommand::HomovaCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; phylipFileName = validParameter.validFile(parameters, "phylip"); if (phylipFileName == "not open") { phylipFileName = ""; abort = true; } else if (phylipFileName == "not found") { //if there is a current phylip file, use it phylipFileName = current->getPhylipFile(); if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You have no current phylip file and the phylip parameter is required.\n"); abort = true; } }else { current->setPhylipFile(phylipFileName); } //check for required parameters designFileName = validParameter.validFile(parameters, "design"); if (designFileName == "not open") { abort = true; } else if (designFileName == "not found") { //if there is a current design file, use it designFileName = current->getDesignFile(); if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current design file and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designFileName); } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, experimentwiseAlpha); string sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); } } if (outputdir == "") { outputdir = util.hasPath(phylipFileName); } } catch(exception& e) { m->errorOut(e, "HomovaCommand", "HomovaCommand"); exit(1); } } //********************************************************************************************************************** int HomovaCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read design file DesignMap* designMap = new DesignMap(designFileName); if (m->getControl_pressed()) { delete designMap; return 0; } //read in distance matrix and square it ReadPhylipVector readMatrix(phylipFileName); vector sampleNames = readMatrix.read(distanceMatrix); if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets vector dGroups = designMap->getCategory(); for(int i=0;igetControl_pressed()) { delete designMap; return 0; } string group = designMap->get(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else if (!util.inUsersGroups(group, Sets)){ //not in set we want remove it //remove from all other rows for(int j=0;j > origGroupSampleMap; for(int i=0;iget(sampleNames[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else { origGroupSampleMap[group].push_back(i); } } int numGroups = origGroupSampleMap.size(); if (m->getControl_pressed()) { delete designMap; return 0; } //create a new filename ofstream HOMOVAFile; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipFileName)); string HOMOVAFileName = getOutputFileName("homova", variables); util.openOutputFile(HOMOVAFileName, HOMOVAFile); outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName); HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl; m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n"); double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha); if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){ int numCombos = numGroups * (numGroups-1) / 2; double pairwiseAlpha = experimentwiseAlpha / (double) numCombos; map >::iterator itA; map >::iterator itB; for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){ itB = itA;itB++; for(;itB!=origGroupSampleMap.end();itB++){ map > pairwiseGroupSampleMap; pairwiseGroupSampleMap[itA->first] = itA->second; pairwiseGroupSampleMap[itB->first] = itB->second; runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha); } } HOMOVAFile << endl; m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n'); } else{ m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n'); } m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n"); delete designMap; m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "execute"); exit(1); } } //********************************************************************************************************************** double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map > groupSampleMap, double alpha){ try { map >::iterator it; int numGroups = groupSampleMap.size(); vector ssWithinOrigVector; double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector); double counter = 0; for(int i=0;i ssWithinRandVector; map > randomizedGroup = getRandomizedGroups(groupSampleMap); double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector); if(bValueRand >= bValueOrig){ counter++; } } double pValue = (double) counter / (double) iters; string pString = ""; if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); } else { pString = toString(pValue); } //print homova table it = groupSampleMap.begin(); HOMOVAFile << it->first; m->mothurOut(it->first); it++; for(;it!=groupSampleMap.end();it++){ HOMOVAFile << '-' << it->first; m->mothurOut('-' + it->first); } HOMOVAFile << '\t' << bValueOrig << '\t' << pString; m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString); if(pValue < alpha){ HOMOVAFile << "*"; m->mothurOut("*"); } for(int i=0;imothurOut('\t' + toString(ssWithinOrigVector[i])); } HOMOVAFile << endl; m->mothurOutEndLine(); return pValue; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "runHOMOVA"); exit(1); } } //********************************************************************************************************************** double HomovaCommand::calcSigleSSWithin(vector sampleIndices) { try { double ssWithin = 0.0; int numSamplesInGroup = sampleIndices.size(); for(int i=0;ierrorOut(e, "HomovaCommand", "calcSigleSSWithin"); exit(1); } } //********************************************************************************************************************** double HomovaCommand::calcBValue(map > groupSampleMap, vector& ssWithinVector) { try { double numGroups = (double)groupSampleMap.size(); ssWithinVector.resize(numGroups, 0); double totalNumSamples = 0; double ssWithinFull = 0; double secondTermSum = 0; double inverseOneMinusSum = 0; int index = 0; for(map >::iterator it = groupSampleMap.begin();it!=groupSampleMap.end();it++){ int numSamplesInGroup = it->second.size(); totalNumSamples += numSamplesInGroup; ssWithinVector[index] = calcSigleSSWithin(it->second); ssWithinFull += ssWithinVector[index]; secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1)); inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1); ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group index++; } double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum; double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups)); B /= denomintor; return B; } catch(exception& e) { m->errorOut(e, "HomovaCommand", "calcBValue"); exit(1); } } //********************************************************************************************************************** map > HomovaCommand::getRandomizedGroups(map > origMapping){ try{ vector sampleIndices; vector samplesPerGroup; for(map >::iterator it=origMapping.begin();it!=origMapping.end();it++){ vector indices = it->second; samplesPerGroup.push_back(indices.size()); sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end()); } util.mothurRandomShuffle(sampleIndices); int index = 0; map > randomizedGroups = origMapping; for(map >::iterator it=randomizedGroups.begin();it!=randomizedGroups.end();it++){ for(int i=0;isecond.size();i++){ it->second[i] = sampleIndices[index++]; } } return randomizedGroups; } catch (exception& e) { m->errorOut(e, "AmovaCommand", "randomizeGroups"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/homovacommand.h000077500000000000000000000025131424121717000212270ustar00rootroot00000000000000#ifndef HOMOVACOMMAND_H #define HOMOVACOMMAND_H /* * homovacommand.h * mothur * * Created by westcott on 2/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class DesignMap; class HomovaCommand : public Command { public: HomovaCommand(string); ~HomovaCommand(){} vector setParameters(); string getCommandName() { return "homova"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71. \nhttp://www.mothur.org/wiki/Homova"; } string getDescription() { return "homova"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: double runHOMOVA(ofstream& , map >, double); double calcSigleSSWithin(vector); double calcBValue(map >, vector&); map > getRandomizedGroups(map >); bool abort; vector outputNames, Sets; string inputDir, designFileName, phylipFileName; vector< vector > distanceMatrix; int iters; double experimentwiseAlpha; }; #endif mothur-1.48.0/source/commands/indicatorcommand.cpp000077500000000000000000001600501424121717000222460ustar00rootroot00000000000000/* * indicatorcommand.cpp * Mothur * * Created by westcott on 11/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "indicatorcommand.h" //********************************************************************************************************************** vector IndicatorCommand::setParameters(){ try { CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","summary",false,false,true); parameters.push_back(pdesign); CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","tree-summary",false,false,true); parameters.push_back(ptree); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors); abort = false; calledHelp = false; vector tempOutNames; outputTypes["tree"] = tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string IndicatorCommand::getHelpString(){ try { string helpString = ""; helpString += "The indicator command can be run in 3 ways: with a shared or relabund file and a design file, or with a shared or relabund file and a tree file, or with a shared or relabund file, tree file and design file. \n"; helpString += "The indicator command outputs a .indicator.summary file and a .indicator.tre if a tree is given. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The summary file lists the indicator value for each OTU for each node.\n"; helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. \n"; helpString += "The design parameter allows you to relate the tree to the shared or relabund file, if your tree contains the grouping names, or if no tree is provided to group your groups into groupings.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n"; helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000."; helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string IndicatorCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "tree") { pattern = "[filename],indicator.tre"; } else if (type == "summary") { pattern = "[filename],indicator.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** IndicatorCommand::IndicatorCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { treefile = ""; } else { current->setTreeFile(treefile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; current->setRelAbundFile(relabundfile); } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { designfile = ""; } else { current->setDesignFile(designfile); } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or relabund.\n"); abort = true; } } } if ((designfile == "") && (treefile == "")) { treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("[ERROR]: You must provide either a tree or design file.\n"); abort = true; } } } if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "IndicatorCommand"); exit(1); } } //********************************************************************************************************************** int IndicatorCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); long start = time(nullptr); //read designfile if given and set up groups for read of sharedfiles vector allGroups; if (designfile != "") { designMap = new DesignMap(designfile); if (m->getControl_pressed()) { delete designMap; return 0; } if (Groups.size() == 0) { Groups = designMap->getCategory(); } allGroups = designMap->getCategory(); namesSeqs = designMap->getNamesGroups(Groups); }else { InputData* input = nullptr; if (sharedfile != "") { input = new InputData(sharedfile, "sharedfile", Groups); } else { input = new InputData(sharedfile, "relabundfile", Groups); } SharedRAbundVectors* lookup = input->getSharedRAbundVectors(); Groups = lookup->getNamesGroups(); namesSeqs = Groups; delete lookup; delete input; } if (treefile != "") { string groupfile = ""; current->setTreeFile(treefile); vector Treenames = util.parseTreeFile(treefile); CountTable ct; bool mismatch = false; set nameMap; map groupMap; set gps; for (int i = 0; i < Treenames.size(); i++) { nameMap.insert(Treenames[i]); //sanity check - is this a group that is not in the sharedfile? if (i == 0) { gps.insert("Group1"); } if (designfile == "") { if (!(util.inUsersGroups(Treenames[i], namesSeqs))) { m->mothurOut("[ERROR]: " + Treenames[i] + " is not a group in your shared or relabund file.\n"); mismatch = true; } groupMap[Treenames[i]] = "Group1"; }else{ vector myGroups; myGroups.push_back(Treenames[i]); vector myNames = designMap->getNamesGroups(myGroups); for(int k = 0; k < myNames.size(); k++) { if (!(util.inUsersGroups(myNames[k], allGroups))) { m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file.\n"); mismatch = true; } } groupMap[Treenames[i]] = designMap->get(Treenames[i]); } } ct.createTable(nameMap, groupMap, gps); if ((designfile != "") && (Treenames.size() != namesSeqs.size())) { m->mothurOut("[ERROR]: You design file does not match your tree, aborting.\n"); mismatch = true; } if (mismatch) { //cleanup and exit if (designfile != "") { delete designMap; } return 0; } ReadTree* read = new ReadNewickTree(treefile, Treenames); int readOk = read->read(&ct); if (readOk != 0) { m->mothurOut("Read Terminated.\n"); delete read; return 0; } vector T = read->getTrees(); delete read; if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } T[0]->assembleTree(); Tree* outputTree = new Tree(namesSeqs.size(), &ct, Treenames); //create ouptut tree - respecting pickedGroups outputTree->getSubTree(T[0], namesSeqs); outputTree->assembleTree(); //no longer need original tree, we have output tree to use and label for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } delete outputTree; return 0; } GetIndicatorSpecies(outputTree); //get indicator species values delete outputTree; }else { GetIndicatorSpecies(); } //run with design file only if (designfile != "") { delete designMap; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set tree file as new current treefile if (treefile != "") { string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } } m->mothurOut("\n\nIt took " + toString(time(nullptr) - start) + " secs to find the indicator species.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "execute"); exit(1); } } //********************************************************************************************************************** //indicatorValues = getValues(groupings, groupingNames, indicatorGroups, randomGroupingsMap, m); vector getValues(vector< vector >& groupings, vector< vector >& groupingNames, vector& indicatorGroupings, map > groupingsMap, MothurOut* m){ try { vector values; map >::iterator it; indicatorGroupings.clear(); //create grouping strings vector groupingsGroups; for (int j = 0; j < groupings.size(); j++) { string tempGrouping = ""; for (int k = 0; k < groupings[j].size()-1; k++) { tempGrouping += groupingNames[j][k] + "-"; } tempGrouping += groupingNames[j][groupingNames[j].size()-1]; groupingsGroups.push_back(tempGrouping); } Utils util; //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { if (m->getControl_pressed()) { return values; } vector terms; float AijDenominator = 0.0; vector Bij; //get overall abundance of each grouping for (int j = 0; j < groupings.size(); j++) { float totalAbund = 0; int numNotZero = 0; for (int k = 0; k < groupings[j].size(); k++) { sharedIndexes temp(j,k); it = groupingsMap.find(temp); if (it == groupingsMap.end()) { //this one didnt get moved, or initial calc totalAbund += groupings[j][k]->get(i); if (!util.isEqual(groupings[j][k]->get(i), 0)) { numNotZero++; } }else { float thisAbund = groupings[(it->second)[0]][(it->second)[1]]->get(i); totalAbund += thisAbund; if (!util.isEqual(thisAbund, 0)) { numNotZero++; } } } //mean abundance float Aij = (totalAbund / (float) groupings[j].size()); terms.push_back(Aij); //percentage of sites represented Bij.push_back(numNotZero / (float) groupings[j].size()); AijDenominator += Aij; } float maxIndVal = 0.0; string maxGrouping = ""; for (int j = 0; j < terms.size(); j++) { float thisAij = (terms[j] / AijDenominator); //relative abundance float thisValue = thisAij * Bij[j] * 100.0; //save largest if (thisValue > maxIndVal) { maxIndVal = thisValue; maxGrouping = groupingsGroups[j]; } } values.push_back(maxIndVal); indicatorGroupings.push_back(maxGrouping); } return values; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getValues"); exit(1); } } //********************************************************************************************************************** //same as above, just data type difference //indicatorValues = getValues(groupings, groupingNames, indicatorGroups, randomGroupingsMap, m); vector getValues(vector< vector >& groupings, vector< vector >& groupingNames, vector& indicatorGroupings, map > groupingsMap, MothurOut* m){ try { vector values; map >::iterator it; indicatorGroupings.clear(); //create grouping strings vector groupingsGroups; for (int j = 0; j < groupings.size(); j++) { string tempGrouping = ""; for (int k = 0; k < groupings[j].size()-1; k++) { tempGrouping += groupingNames[j][k] + "-"; } tempGrouping += groupingNames[j][groupingNames[j].size()-1]; groupingsGroups.push_back(tempGrouping); } //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { vector terms; float AijDenominator = 0.0; vector Bij; //get overall abundance of each grouping for (int j = 0; j < groupings.size(); j++) { int totalAbund = 0.0; int numNotZero = 0; for (int k = 0; k < groupings[j].size(); k++) { sharedIndexes temp(j,k); it = groupingsMap.find(temp); if (it == groupingsMap.end()) { //this one didnt get moved totalAbund += groupings[j][k]->get(i); if (groupings[j][k]->get(i) != 0) { numNotZero++; } }else { //cout << j << "," << k << '\t' << (it->second).treatmentIndex << "," << (it->second).sampleIndex << endl; int thisAbund = groupings[(it->second)[0]][(it->second)[1]]->get(i); totalAbund += thisAbund; if (thisAbund != 0) { numNotZero++; } } } //mean abundance float Aij = (totalAbund / (float) groupings[j].size()); terms.push_back(Aij); //percentage of sites represented Bij.push_back(numNotZero / (float) groupings[j].size()); AijDenominator += Aij; } float maxIndVal = 0.0; string maxGrouping = ""; for (int j = 0; j < terms.size(); j++) { float thisAij = (terms[j] / AijDenominator); //relative abundance float thisValue = thisAij * Bij[j] * 100.0; //save largest if (thisValue > maxIndVal) { maxIndVal = thisValue; maxGrouping = groupingsGroups[j]; } } values.push_back(maxIndVal); indicatorGroupings.push_back(maxGrouping); } return values; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getValues"); exit(1); } } //********************************************************************************************************************** //divide shared or relabund file by groupings in the design file //report all otu values to file int IndicatorCommand::GetIndicatorSpecies(){ try { SharedRAbundVectors* lookup = nullptr; SharedRAbundFloatVectors* lookupFloat = nullptr; if (sharedfile != "") { lookup = getShared(); if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } delete lookup; return 0; } if (lookup == nullptr) { m->mothurOut("[ERROR] reading shared file.\n"); return 0; } }else { lookupFloat = getSharedFloat(); if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } delete lookupFloat; return 0; } if (lookupFloat == nullptr) { m->mothurOut("[ERROR] reading relabund file.\n"); return 0; } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(inputFileName); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); m->mothurOut("\nSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n"); int numBins = 0; vector currentLabels; if (sharedfile != "") { numBins = lookup->getNumBins(); currentLabels = lookup->getOTUNames(); } else { numBins = lookupFloat->getNumBins(); currentLabels = lookupFloat->getOTUNames(); } if (m->getControl_pressed()) { out.close(); if (sharedfile != "") { delete lookup; } else { delete lookupFloat; } return 0; } /*****************************************************/ //create vectors containing rabund info // /*****************************************************/ vector indicatorValues; //size of numBins vector pValues; vector indicatorGroups; map > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { vector< vector > groupings; vector< vector > groupingNames; /* Consider design file: C10 Y C11 Y C12 Y C13 O C14 O C15 O C19 N C20 N C21 N groupings[0] = sharedRabundvectors for treatment Y -> C10,C11,C12 groupings[1] = sharedRabundvectors for treatment O -> C13,C15,C15 groupings[2] = sharedRabundvectors for treatment N -> C13,C15,C15 groupingNames[0] = vector of sample names in treatment Y {C10,C11,C12} groupingNames[1] = vector of sample names in treatment 0 {C13,C15,C15} groupingNames[2] = vector of sample names in treatment N {C13,C15,C15} */ set groupsAlreadyAdded; vector subset; vector subsetNames; vector data = lookup->getSharedRAbundVectors(); vector dataGroupNames = lookup->getNamesGroups(); //for each grouping, clustering together for (int i = 0; i < (designMap->getCategory()).size(); i++) { for (int k = 0; k < data.size(); k++) { //are you from this grouping? if (designMap->get(dataGroupNames[k]) == (designMap->getCategory())[i]) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); } } if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } subset.clear(); } if (groupsAlreadyAdded.size() != data.size()) { m->mothurOut("[ERROR]: could not make proper groupings.\n"); } indicatorValues = getValues(groupings, groupingNames, indicatorGroups, randomGroupingsMap, m); pValues = getPValues(groupings, groupingNames, lookup->size(), indicatorValues); }else { vector< vector > groupings; vector< vector > groupingNames; set groupsAlreadyAdded; vector subset; vector subsetNames; vector data = lookupFloat->getSharedRAbundFloatVectors(); vector dataGroupNames = lookupFloat->getNamesGroups(); //for each grouping for (int i = 0; i < (designMap->getCategory()).size(); i++) { for (int k = 0; k < data.size(); k++) { //are you from this grouping? if (designMap->get(dataGroupNames[k]) == (designMap->getCategory())[i]) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); } } if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } subset.clear(); } if (groupsAlreadyAdded.size() != data.size()) { m->mothurOut("[ERROR]: could not make proper groupings.\n"); } indicatorValues = getValues(groupings, groupingNames, indicatorGroups, randomGroupingsMap, m); pValues = getPValues(groupings, groupingNames, lookupFloat->size(), indicatorValues); } if (m->getControl_pressed()) { out.close(); return 0; } /******************************************************/ //output indicator values to table form // /*****************************************************/ out << "OTU\tIndicator_Groups\tIndicator_Value\tpValue" << endl; for (int j = 0; j < indicatorValues.size(); j++) { if (m->getControl_pressed()) { out.close(); return 0; } out << currentLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } else { out << "<" << (1/(float)iters) << endl; } if (pValues[j] <= 0.05) { string pValueString = "<" + toString((1/(float)iters)); if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); } m->mothurOut(currentLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString + "\n"); } } out.close(); if (sharedfile != "") { delete lookup; } else { delete lookupFloat; } return 0; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies"); exit(1); } } //********************************************************************************************************************** //traverse tree finding indicator species values for each otu at each node //label node with otu number that has highest indicator value //report all otu values to file int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ try { SharedRAbundVectors* lookup = nullptr; SharedRAbundFloatVectors* lookupFloat = nullptr; if (sharedfile != "") { lookup = getShared(); if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } delete lookup; return 0; } if (lookup == nullptr) { m->mothurOut("[ERROR] reading shared file.\n"); return 0; } }else { lookupFloat = getSharedFloat(); if (m->getControl_pressed()) { if (designfile != "") { delete designMap; } delete lookupFloat; return 0; } if (lookupFloat == nullptr) { m->mothurOut("[ERROR] reading relabund file.\n"); return 0; } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(inputFileName); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("summary",variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); int numBins = 0; vector currentLabels; if (sharedfile != "") { numBins = lookup->getNumBins(); currentLabels = lookup->getOTUNames(); } else { numBins = lookupFloat->getNumBins(); currentLabels = lookupFloat->getOTUNames(); } //print headings out << "TreeNode\t"; for (int i = 0; i < numBins; i++) { out << currentLabels[i] << "_IndGroups" << '\t' << currentLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; } out << endl; m->mothurOut("\nNode\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n"); string treeOutputDir = outputdir; if (outputdir == "") { treeOutputDir += util.hasPath(treefile); } variables["[filename]"] = treeOutputDir + util.getRootName(util.getSimpleName(treefile)); string outputTreeFileName = getOutputFileName("tree", variables); //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need map > nodeToDescendants; map > descendantNodes; for (int i = 0; i < T->getNumNodes(); i++) { if (m->getControl_pressed()) { return 0; } nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants, descendantNodes); } //you need the distances to leaf to decide grouping below //this will also set branch lengths if the tree does not include them map distToRoot = getDistToRoot(T); //for each node for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) { if (m->getControl_pressed()) { out.close(); return 0; } /*****************************************************/ //create vectors containing rabund info // /*****************************************************/ vector indicatorValues; //size of numBins vector pValues; vector indicatorGroups; vector< map > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { vector< vector > groupings; vector< vector > groupingNames; //get nodes that will be a valid grouping //you are valid if you are not one of my descendants //AND your distToRoot is >= mine //AND you were not added as part of a larger grouping. Largest nodes are added first. set groupsAlreadyAdded; //create a grouping with my grouping vector subset; vector subsetNames; int count = 0; int doneCount = nodeToDescendants[i].size(); vector data = lookup->getSharedRAbundVectors(); vector dataGroupNames = lookup->getNamesGroups(); for (int k = 0; k < data.size(); k++) { //is this descendant of i if ((nodeToDescendants[i].count(dataGroupNames[k]) != 0)) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); count++; } if (count == doneCount) { break; } //quit once you get the rabunds for this grouping } if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } for (int j = (T->getNumNodes()-1); j >= 0; j--) { if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; vector subsetNames; int count = 0; int doneCount = nodeToDescendants[j].size(); for (int k = 0; k < data.size(); k++) { //is this descendant of j, and we didn't already add this as part of a larger grouping if ((nodeToDescendants[j].count(dataGroupNames[k]) != 0) && (groupsAlreadyAdded.count(dataGroupNames[k]) == 0)) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); count++; } if (count == doneCount) { break; } //quit once you get the rabunds for this grouping } //if subset.size == 0 then the node was added as part of a larger grouping if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } } } if (groupsAlreadyAdded.size() != data.size()) { m->mothurOut("[ERROR]: could not make proper groupings.\n"); } map > placeHolder; //don't need randomization for initial calc indicatorValues = getValues(groupings, groupingNames, indicatorGroups, placeHolder, m); pValues = getPValues(groupings, groupingNames, lookup->getNumGroups(), indicatorValues); }else { vector< vector > groupings; vector< vector > groupingNames; //get nodes that will be a valid grouping //you are valid if you are not one of my descendants //AND your distToRoot is >= mine //AND you were not added as part of a larger grouping. Largest nodes are added first. set groupsAlreadyAdded; //create a grouping with my grouping vector subset; vector subsetNames; int count = 0; int doneCount = nodeToDescendants[i].size(); vector data = lookupFloat->getSharedRAbundFloatVectors(); vector dataGroupNames = lookupFloat->getNamesGroups(); for (int k = 0; k < data.size(); k++) { //is this descendant of i if ((nodeToDescendants[i].count(dataGroupNames[k]) != 0)) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); count++; } if (count == doneCount) { break; } //quit once you get the rabunds for this grouping } if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } for (int j = (T->getNumNodes()-1); j >= 0; j--) { if ((descendantNodes[i].count(j) == 0) && (distToRoot[j] >= distToRoot[i])) { vector subset; vector subsetNames; int count = 0; int doneCount = nodeToDescendants[j].size(); for (int k = 0; k < data.size(); k++) { //is this descendant of j, and we didn't already add this as part of a larger grouping if ((nodeToDescendants[j].count(dataGroupNames[k]) != 0) && (groupsAlreadyAdded.count(dataGroupNames[k]) == 0)) { subset.push_back(data[k]); subsetNames.push_back(dataGroupNames[k]); groupsAlreadyAdded.insert(dataGroupNames[k]); count++; } if (count == doneCount) { break; } //quit once you get the rabunds for this grouping } //if subset.size == 0 then the node was added as part of a larger grouping if (subset.size() != 0) { groupings.push_back(subset); groupingNames.push_back(subsetNames); } } } if (groupsAlreadyAdded.size() != data.size()) { m->mothurOut("[ERROR]: could not make proper groupings.\n"); } map > placeHolder; //don't need randomization for initial calc indicatorValues = getValues(groupings, groupingNames, indicatorGroups, placeHolder, m); pValues = getPValues(groupings, groupingNames, lookupFloat->getNumGroups(), indicatorValues); } if (m->getControl_pressed()) { out.close(); return 0; } /******************************************************/ //output indicator values to table form + label tree // /*****************************************************/ out << (i+1); for (int j = 0; j < indicatorValues.size(); j++) { if (m->getControl_pressed()) { out.close(); if (sharedfile != "") { delete lookup; } else { delete lookupFloat; } return 0; } if (pValues[j] < (1/(float)iters)) { out << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << '<' << (1/(float)iters); }else { out << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << pValues[j]; } if (pValues[j] <= 0.05) { string pValueString = "\t<" + toString((1/(float)iters)); if (pValues[j] > (1/(float)iters)) { pValueString = toString('\t' + pValues[j]); } m->mothurOut(toString(i+1) + "\t" + currentLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString + "\n"); } } out << endl; T->tree[i].setLabel(toString(i+1)); } out.close(); ofstream outTree; util.openOutputFile(outputTreeFileName, outTree); outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName); T->print(outTree, "both"); outTree.close(); if (sharedfile != "") { delete lookup; } else { delete lookupFloat; } return 0; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies"); exit(1); } } /**************************************************************************************************/ struct indicatorFloatData { vector< vector > groupings; vector< vector > groupingNames; vector< map > > randomGroupings; MothurOut* m; int iters, numGroups; vector indicatorValues, pvalues; indicatorFloatData(){} indicatorFloatData(int it, vector< map > > ran, vector< vector > ng, vector< vector > gn, int n, vector iv) { m = MothurOut::getInstance(); iters = it; groupings = ng; groupingNames = gn; randomGroupings = ran; indicatorValues = iv; pvalues.resize(indicatorValues.size(), 0); numGroups = n; } }; struct indicatorData { vector< vector > groupings; vector< vector > groupingNames; vector< map > > randomGroupings; MothurOut* m; int iters, numGroups; vector indicatorValues, pvalues; indicatorData(){} indicatorData(int it, vector< map > > ran, vector< vector > ng, vector< vector > gn, int n, vector iv) { m = MothurOut::getInstance(); iters = it; groupings = ng; groupingNames = gn; randomGroupings = ran; indicatorValues = iv; pvalues.resize(indicatorValues.size(), 0); numGroups = n; } }; //********************************************************************************************************************** void driverValues(indicatorData* params){ try { vector notUsedGroupings; //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something for(int i=0;iiters;i++){ if (params->m->getControl_pressed()) { break; } map > groupingsMap = params->randomGroupings[i]; vector randomIndicatorValues = getValues(params->groupings, params->groupingNames, notUsedGroupings, groupingsMap, params->m); for (int j = 0; j < params->indicatorValues.size(); j++) { if (randomIndicatorValues[j] >= params->indicatorValues[j]) { params->pvalues[j]++; } } } }catch(exception& e) { params->m->errorOut(e, "IndicatorCommand", "driver"); exit(1); } } //********************************************************************************************************************** void driverValuesFloat(indicatorFloatData* params){ try { vector notUsedGroupings; //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something for(int i=0;iiters;i++){ if (params->m->getControl_pressed()) { break; } map > groupingsMap = params->randomGroupings[i]; vector randomIndicatorValues = getValues(params->groupings, params->groupingNames, notUsedGroupings, groupingsMap, params->m); for (int j = 0; j < params->indicatorValues.size(); j++) { if (randomIndicatorValues[j] >= params->indicatorValues[j]) { params->pvalues[j]++; } } } }catch(exception& e) { params->m->errorOut(e, "IndicatorCommand", "driver"); exit(1); } } //********************************************************************************************************************** vector IndicatorCommand::getPValues(vector< vector >& groupings, vector< vector >& groupingNames, int num, vector indicatorValues){ try { vector pvalues; vector groupingsSize; for (int i = 0; i < groupings.size(); i++) { groupingsSize.push_back(groupings[i].size()); } vector< map > > randomize = randomizeGroupings(groupingsSize, groupings.size()); //create array of worker threads vector workerThreads; vector data; //divide iters between processors vector procIters; int numItersPerProcessor = iters / processors; //divide iters between processes for (int h = 0; h < processors; h++) { if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } procIters.push_back(numItersPerProcessor); } //Lauch worker threads int start = 0; for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations vector< vector > newGroupings; for (int k = 0; k < groupings.size(); k++) { vector newLookup; for (int l = 0; l < groupings[k].size(); l++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(*groupings[k][l]); newLookup.push_back(temp); } newGroupings.push_back(newLookup); } vector< map > > thisProcessorsRandom; thisProcessorsRandom.insert(thisProcessorsRandom.begin(), randomize.begin()+start, randomize.begin()+start+procIters[i]); start += procIters[i]; indicatorFloatData* dataBundle = new indicatorFloatData(procIters[i], thisProcessorsRandom, newGroupings, groupingNames, num, indicatorValues); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverValuesFloat, dataBundle)); } //make copy of lookup so we don't get access violations vector< vector > newGroupings; for (int k = 0; k < groupings.size(); k++) { vector newLookup; for (int l = 0; l < groupings[k].size(); l++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(*groupings[k][l]); newLookup.push_back(temp); } newGroupings.push_back(newLookup); } vector< map > > thisProcessorsRandom; thisProcessorsRandom.insert(thisProcessorsRandom.begin(), randomize.begin()+start, randomize.begin()+start+procIters[processors-1]); indicatorFloatData* dataBundle = new indicatorFloatData(procIters[processors-1], thisProcessorsRandom, newGroupings, groupingNames, num, indicatorValues); driverValuesFloat(dataBundle); pvalues = dataBundle->pvalues; for (int l = 0; l < newGroupings.size(); l++) { for (int j = 0; j < newGroupings[l].size(); j++) { delete newGroupings[l][j]; } } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->pvalues.size(); j++) { pvalues[j] += data[i]->pvalues[j]; } for (int l = 0; l < data[i]->groupings.size(); l++) { for (int j = 0; j < data[i]->groupings[l].size(); j++) { delete data[i]->groupings[l][j]; } } delete data[i]; delete workerThreads[i]; } delete dataBundle; for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } return pvalues; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getPValues"); exit(1); } } //********************************************************************************************************************** //same as above, just data type difference vector IndicatorCommand::getPValues(vector< vector >& groupings, vector< vector >& groupingNames, int num, vector indicatorValues){ try { vector pvalues; vector groupingsSize; for (int i = 0; i < groupings.size(); i++) { groupingsSize.push_back(groupings[i].size()); } vector< map > > randomize = randomizeGroupings(groupingsSize, groupings.size()); //create array of worker threads vector workerThreads; vector data; //divide iters between processors vector procIters; int numItersPerProcessor = iters / processors; //divide iters between processes for (int h = 0; h < processors; h++) { if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } procIters.push_back(numItersPerProcessor); } //Lauch worker threads int start = 0; for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations vector< vector > newGroupings; for (int k = 0; k < groupings.size(); k++) { vector newLookup; for (int l = 0; l < groupings[k].size(); l++) { SharedRAbundVector* temp = new SharedRAbundVector(*groupings[k][l]); newLookup.push_back(temp); } newGroupings.push_back(newLookup); } vector< map > > thisProcessorsRandom; thisProcessorsRandom.insert(thisProcessorsRandom.begin(), randomize.begin()+start, randomize.begin()+start+procIters[i]); start += procIters[i]; indicatorData* dataBundle = new indicatorData(procIters[i], thisProcessorsRandom, newGroupings, groupingNames, num, indicatorValues); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverValues, dataBundle)); } //make copy of lookup so we don't get access violations vector< vector > newGroupings; for (int k = 0; k < groupings.size(); k++) { vector newLookup; for (int l = 0; l < groupings[k].size(); l++) { SharedRAbundVector* temp = new SharedRAbundVector(*groupings[k][l]); newLookup.push_back(temp); } newGroupings.push_back(newLookup); } vector< map > > thisProcessorsRandom; thisProcessorsRandom.insert(thisProcessorsRandom.begin(), randomize.begin()+start, randomize.begin()+start+procIters[processors-1]); indicatorData* dataBundle = new indicatorData(procIters[processors-1], thisProcessorsRandom, newGroupings, groupingNames, num, indicatorValues); driverValues(dataBundle); pvalues = dataBundle->pvalues; for (int l = 0; l < newGroupings.size(); l++) { for (int j = 0; j < newGroupings[l].size(); j++) { delete newGroupings[l][j]; } } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (int j = 0; j < data[i]->pvalues.size(); j++) { pvalues[j] += data[i]->pvalues[j]; } for (int l = 0; l < data[i]->groupings.size(); l++) { for (int j = 0; j < data[i]->groupings[l].size(); j++) { delete data[i]->groupings[l][j]; } } delete data[i]; delete workerThreads[i]; } delete dataBundle; for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } return pvalues; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getPValues"); exit(1); } } //********************************************************************************************************************** //swap samples between treatments, in essence randomizing the second column of the design file vector< map > > IndicatorCommand::randomizeGroupings(vector sizesOfEachTreatment, int numTreatments){ try { int numSamplesToSwap = 0; for (int i = 0; i < sizesOfEachTreatment.size(); i++) { numSamplesToSwap += sizesOfEachTreatment[i]; } vector< map > > randomGroupings; //map for each iter randomGroupings[0] -> for (int k = 0; k < iters; k++) { if (m->getControl_pressed()) {break;} map > thisRandomization; for (int i = 0; i < numSamplesToSwap; i++) { //select random treatment and random sample to swap int z = util.getRandomIndex(numTreatments-1); int a = util.getRandomIndex(sizesOfEachTreatment[z]-1); int x = util.getRandomIndex(numTreatments-1); int b = util.getRandomIndex(sizesOfEachTreatment[x]-1); sharedIndexes from(z, a); vector to; to.push_back(x); to.push_back(b); thisRandomization[from] = to; //cout << k << " : " << z << "," << a << '\t' << x << "," << b << endl; //cout << from.treatmentIndex << "," << from.sampleIndex << '\t' << to[0] << "," << to[1] << endl; } randomGroupings.push_back(thisRandomization); } return randomGroupings; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "randomizeGroupings"); exit(1); } } //********************************************************************************************************************** SharedRAbundVectors* IndicatorCommand::getShared(){ try { InputData input(sharedfile, "sharedfile", namesSeqs); set processedLabels; set userLabels; string lastLabel = ""; if (label != "") { userLabels.insert(label); } SharedRAbundVectors* lookup = util.getNextShared(input, true, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (label == "") { label = lastLabel; } return lookup; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getShared"); exit(1); } } //********************************************************************************************************************** SharedRAbundFloatVectors* IndicatorCommand::getSharedFloat(){ try { InputData input(relabundfile, "relabund", namesSeqs); set processedLabels; set userLabels; string lastLabel = ""; if (label != "") { userLabels.insert(label); } SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, true, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (label == "") { label = lastLabel; } return lookup; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getSharedFloat"); exit(1); } } //********************************************************************************************************************** //you need the distances to root to decide groupings //this will also set branch lengths if the tree does not include them map IndicatorCommand::getDistToRoot(Tree*& T){ try { map dists; bool hasBranchLengths = false; for (int i = 0; i < T->getNumNodes(); i++) { if (T->tree[i].getBranchLength() > 0.0) { hasBranchLengths = true; break; } } //set branchlengths if needed if (!hasBranchLengths) { for (int i = 0; i < T->getNumNodes(); i++) { int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); if (lc == -1) { // you are a leaf //if you are a leaf set you priliminary length to 1.0, this may adjust later T->tree[i].setBranchLength(1.0); dists[i] = 1.0; }else{ // you are an internal node //look at your children's length to leaf float ldist = dists[lc]; float rdist = dists[rc]; float greater = ldist; if (rdist > greater) { greater = rdist; dists[i] = ldist + 1.0;} else { dists[i] = rdist + 1.0; } //branch length = difference + 1 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0)); T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0)); } } } dists.clear(); for (int i = 0; i < T->getNumNodes(); i++) { double sum = 0.0; int index = i; while(T->tree[index].getParent() != -1){ if (!util.isEqual(T->tree[index].getBranchLength(), -1)) { sum += abs(T->tree[index].getBranchLength()); } index = T->tree[index].getParent(); } dists[i] = sum; } return dists; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getLengthToLeaf"); exit(1); } } //********************************************************************************************************************** set IndicatorCommand::getDescendantList(Tree*& T, int i, map > descendants, map >& nodes){ try { set names; set::iterator it; int lc = T->tree[i].getLChild(); int rc = T->tree[i].getRChild(); if (lc == -1) { //you are a leaf your only descendant is yourself set temp; temp.insert(i); nodes[i] = temp; if (designfile == "") { names.insert(T->tree[i].getName()); } else { //string myRep = designMap->get(T->tree[i].getName()); names.insert(T->tree[i].getName()); } }else{ //your descedants are the combination of your childrens descendants names = descendants[lc]; nodes[i] = nodes[lc]; for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) { names.insert(*it); } for (set::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) { nodes[i].insert(*itNum); } nodes[i].insert(i); //you are your own descendant } return names; } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "getDescendantList"); exit(1); } } /*****************************************************************/ mothur-1.48.0/source/commands/indicatorcommand.h000077500000000000000000000061361424121717000217170ustar00rootroot00000000000000#ifndef INDICATORCOMMAND_H #define INDICATORCOMMAND_H /* * indicatorcommand.h * Mothur * * Created by westcott on 11/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "readtree.h" #include "counttable.h" #include "inputdata.h" #include "designmap.h" //********************************************************************************************************************** struct sharedIndexes { int treatmentIndex; int sampleIndex; sharedIndexes() : treatmentIndex(0), sampleIndex(0) {} sharedIndexes(int g, int o) : treatmentIndex(g), sampleIndex(o) {} bool operator<(const sharedIndexes& rhs) const { return rhs.treatmentIndex < this->treatmentIndex || (rhs.treatmentIndex == this->treatmentIndex && rhs.sampleIndex < this->sampleIndex); } bool operator>(const sharedIndexes& rhs) const { return rhs.treatmentIndex > this->treatmentIndex || (rhs.treatmentIndex == this->treatmentIndex && rhs.sampleIndex > this->sampleIndex); } bool operator=(const sharedIndexes& rhs) const { return ((rhs.treatmentIndex == this->treatmentIndex) && (rhs.sampleIndex == this->sampleIndex)); } }; //********************************************************************************************************************** class IndicatorCommand : public Command { public: IndicatorCommand(string); ~IndicatorCommand(){} vector setParameters(); string getCommandName() { return "indicator"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Dufrene M, Legendre P (1997). Species assemblages and indicator species: The need for a flexible asymmetrical approach. Ecol Monogr 67: 345-66.\n McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Indicator"; } string getDescription() { return "calculate the indicator value for each OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: DesignMap* designMap; string treefile, sharedfile, relabundfile, groups, label, inputFileName, designfile; bool abort; int iters, processors; vector outputNames, Groups, namesSeqs; set getDescendantList(Tree*&, int, map >, map >&); map getDistToRoot(Tree*&); vector< map > > randomizeGroupings(vector, int); SharedRAbundVectors* getShared(); SharedRAbundFloatVectors* getSharedFloat(); int GetIndicatorSpecies(Tree*&); int GetIndicatorSpecies(); vector getPValues(vector< vector >&, vector< vector >&, int, vector); vector getPValues(vector< vector >&, vector< vector >&, int, vector); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/kruskalwalliscommand.cpp000077500000000000000000000231001424121717000231540ustar00rootroot00000000000000/* * File: kruskalwalliscommand.cpp * Author: kiverson * * Created on June 26, 2012, 11:06 AM */ #include "kruskalwalliscommand.h" #include "linearalgebra.h" //********************************************************************************************************************** vector KruskalWallisCommand::setParameters(){ try { CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["kruskall-wallis"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string KruskalWallisCommand::getHelpString(){ try { string helpString = ""; helpString += "The kruskal.wallis command allows you to ....\n"; helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string KruskalWallisCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** KruskalWallisCommand::KruskalWallisCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } //get shared file, it is required designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { //if there is a current shared file, use it designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current design file and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } mclass = validParameter.valid(parameters, "class"); if (mclass == "not found") { mclass = ""; } } } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand"); exit(1); } } //********************************************************************************************************************** int KruskalWallisCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); if (m->getControl_pressed()) { return 0; } //if user did not select class use first column if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } InputData input(sharedfile, "sharedfile", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); vector currentLabels = lookup->getOTUNames(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } vector data = lookup->getSharedRAbundVectors(); process(data, designMap, currentLabels); for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "execute"); exit(1); } } //********************************************************************************************************************** int KruskalWallisCommand::process(vector& lookup, DesignMap& designMap, vector currentLabels) { try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string outputFileName = getOutputFileName("kruskall-wallis",variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); out << "OTULabel\tKW\tPvalue\n"; int numBins = lookup[0]->getNumBins(); //sanity check to make sure each treatment has a group in the shared file set treatments; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category treatments.insert(treatment); } if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things for classes to compare, quitting.\n"); m->setControl_pressed(true); } LinearAlgebra linear; for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } vector values; for (int j = 0; j < lookup.size(); j++) { string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category spearmanRank temp(treatment, lookup[j]->get(i)); values.push_back(temp); } double pValue = 0.0; double H = linear.calcKruskalWallis(values, pValue); //output H and signifigance out << currentLabels[i] << '\t' << H << '\t' << pValue << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/kruskalwalliscommand.h000077500000000000000000000024441424121717000226310ustar00rootroot00000000000000/* * File: kruskalwalliscommand.h * Author: kiverson * * Created on June 26, 2012, 11:07 AM */ #ifndef KRUSKALWALLISCOMMAND_H #define KRUSKALWALLISCOMMAND_H #include "command.hpp" #include "inputdata.h" #include "designmap.h" class KruskalWallisCommand : public Command { public: KruskalWallisCommand(string); ~KruskalWallisCommand(){} vector setParameters(); string getCommandName() { return "kruskal.wallis"; } string getCommandCategory() { return "Hypothesis Testing"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Kruskal.wallis"; } string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; } struct groupRank { string group; double value; double rank; }; int execute(); void help() { m->mothurOut(getHelpString()); } void assignRank(vector&); void assignValue(vector&); private: bool abort, allLines; string sharedfile, designfile, mclass; vector outputNames; set labels; int process(vector&, DesignMap&, vector); }; #endif /* KRUSKALWALLISCOMMAND_H */ mothur-1.48.0/source/commands/lefsecommand.cpp000077500000000000000000001670321424121717000213770ustar00rootroot00000000000000// // lefsecommand.cpp // Mothur // // Created by SarahsWork on 6/12/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "lefsecommand.h" #include "linearalgebra.h" //********************************************************************************************************************** vector LefseCommand::setParameters(){ try { CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); CommandParameter pshared("shared", "InputTypes", "", "", "shared-clr", "none", "none","metastats",false,false,true); parameters.push_back(pshared); //CommandParameter pclr("clr", "InputTypes", "", "", "shared-clr", "none", "none","metastats",false,false,true); parameters.push_back(pclr); CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); //CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter plda("lda", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(plda); CommandParameter pwilc("wilc", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pwilc); CommandParameter pnormmillion("norm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pnormmillion); CommandParameter piters("iters", "Number", "", "30", "", "", "","",false,false); parameters.push_back(piters); //CommandParameter pwilcsamename("wilcsamename", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pwilcsamename); CommandParameter pcurv("curv", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcurv); CommandParameter pfiters("fboots", "Number", "", "0.67", "", "", "","",false,false); parameters.push_back(pfiters); CommandParameter pstrict("strict", "Multiple", "0-1-2", "0", "", "", "","",false,false); parameters.push_back(pstrict); CommandParameter pminc("minc", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminc); CommandParameter pmulticlass_strat("multiclass", "Multiple", "onevone-onevall", "onevall", "", "", "","",false,false); parameters.push_back(pmulticlass_strat); CommandParameter ppairwise("pairwise", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppairwise); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; runAll = true; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "LefseCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string LefseCommand::getHelpString(){ try { string helpString = ""; helpString += "The lefse command allows you to ....\n"; helpString += "The lefse command parameters are: shared, design, class, subclass, label, pairwise, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n"; helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n"; helpString += "The walpha parameter is used to set the alpha value for the Wilcoxon test. Default=0.05. \n"; helpString += "The lda parameter is used to set the threshold on the absolute value of the logarithmic LDA score. Default=2.0. \n"; helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n"; helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n"; //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n"; helpString += "The pairwise parameter allows you to run all pairwise comparisons of the sets in your design file. Default=f.\n"; helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n"; helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n"; helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n"; helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n"; helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n"; helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "LefseCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string LefseCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],[distance],lefse_summary-[filename],[distance],[combo],lefse_summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "LefseCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** LefseCommand::LefseCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); inputfile = sharedfile; format = "sharedfile"; } if ((sharedfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared file.\n"); abort = true; } } //get shared file, it is required designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { //if there is a current shared file, use it designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current design file and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designfile); } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } mclass = validParameter.valid(parameters, "class"); if (mclass == "not found") { mclass = ""; } subclass = validParameter.valid(parameters, "subclass"); if (subclass == "not found") { subclass = mclass; } string temp = validParameter.valid(parameters, "aalpha"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, anovaAlpha); temp = validParameter.valid(parameters, "walpha"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, wilcoxonAlpha); temp = validParameter.valid(parameters, "wilc"); if (temp == "not found") { temp = "T"; } wilc = util.isTrue(temp); temp = validParameter.valid(parameters, "norm"); if (temp == "not found") { temp = "T"; } normMillion = util.isTrue(temp); temp = validParameter.valid(parameters, "lda"); if (temp == "not found") { temp = "2.0"; } util.mothurConvert(temp, ldaThreshold); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "30"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "fboots"); if (temp == "not found") { temp = "0.67"; } util.mothurConvert(temp, fBoots); temp = validParameter.valid(parameters, "curv"); if (temp == "not found") { temp = "F"; } curv = util.isTrue(temp); temp = validParameter.valid(parameters, "strict"); if (temp == "not found"){ temp = "0"; } if ((temp != "0") && (temp != "1") && (temp != "2")) { m->mothurOut("[ERROR]: Invalid strict option: choices are 0, 1 or 2.\n"); abort=true; } else { util.mothurConvert(temp, strict); } temp = validParameter.valid(parameters, "minc"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, minC); sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); } temp = validParameter.valid(parameters, "pairwise"); if(temp == "not found"){ temp = "F"; } pairwise = util.isTrue(temp); multiClassStrat = validParameter.valid(parameters, "multiclass"); if (multiClassStrat == "not found"){ multiClassStrat = "onevall"; } if ((multiClassStrat != "onevall") && (multiClassStrat != "onevone")) { m->mothurOut("Invalid multiclass option: choices are onevone or onevall.\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "LefseCommand", "LefseCommand"); exit(1); } } //********************************************************************************************************************** int LefseCommand::execute(){ try { unsigned int holdRandom = m->getRandomSeed(); m->setRandomSeed(1982); if (abort) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); if (m->getControl_pressed()) { return 0; } //if user did not select class use first column if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } } vector Groups; int numSets = Sets.size(); if (Sets.size() != 0) { //user has picked sets find groups to include from lookup designMap.setDefaultClass(mclass); Groups = designMap.getNamesGroups(Sets); }else { Sets = designMap.getCategory(); numSets = (int)Sets.size(); } if (numSets != 2) { //for 2 sets just run pairwise InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundFloatVectors* lookup = nullptr; SharedCLRVectors* clr = nullptr; if (format == "sharedfile") { lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); } while ((lookup != nullptr) || (clr != nullptr)){ if (m->getControl_pressed()) { if (lookup != nullptr) { delete lookup; } if (clr != nullptr) { delete clr; }break; } process(lookup, clr, designMap, ""); if (format == "sharedfile") { delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } }else { runPairwiseAnalysis(designMap); } if (pairwise) { runPairwiseAnalysis(designMap); } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); m->setRandomSeed(holdRandom); return 0; } catch(exception& e) { m->errorOut(e, "LefseCommand", "execute"); exit(1); } } //********************************************************************************************************************** void LefseCommand::runPairwiseAnalysis(DesignMap& designMap) { try { int numSets = (int)Sets.size(); runAll = false; if (numSets < 2) { m->mothurOut("[ERROR]: Not enough sets, I need at least 2 valid sets. Unable to complete pairwise analysis.\n"); m->setControl_pressed(true); return; } for (int a=0; amothurOut("\nComparing " + combo + ":\n"); vector thisSetPair; thisSetPair.push_back(Sets[a]); thisSetPair.push_back(Sets[l]); vector Groups = designMap.getNamesGroups(thisSetPair); InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundFloatVectors* lookup = nullptr; SharedCLRVectors* clr = nullptr; if (format == "sharedfile") { lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); } while ((lookup != nullptr) || (clr != nullptr)){ if (m->getControl_pressed()) { if (lookup != nullptr) { delete lookup; } if (clr != nullptr) { delete clr; }break; } process(lookup, clr, designMap, combo); if (format == "sharedfile") { delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } } } } catch(exception& e) { m->errorOut(e, "LefseCommand", "runPairwiseAnalysis"); exit(1); } } //********************************************************************************************************************** int LefseCommand::process(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, DesignMap& designMap, string combo) { try { vector classes; vector subclasses; map subclass2Class; map > class2SubClasses; //maps class name to vector of its subclasses map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. map > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. if (normMillion) { normalize(lookup, clr); } vector namesOfGroups; if (lookup != nullptr) { namesOfGroups = lookup->getNamesGroups(); } else { namesOfGroups = clr->getNamesGroups(); } for (int j = 0; j < namesOfGroups.size(); j++) { string group = namesOfGroups[j]; string treatment = designMap.get(group, mclass); //get value for this group in this category string thisSub = designMap.get(group, subclass); map::iterator it = subclass2Class.find(thisSub); if (it == subclass2Class.end()) { subclass2Class[thisSub] = treatment; vector temp; temp.push_back(j); subClass2GroupIndex[thisSub] = temp; }else { if (it->second != treatment) { //m->mothurOut("[WARNING]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class for Wilcoxon. Changing " + thisSub + " to " + treatment + "_" + thisSub + ".\n"); thisSub = treatment + "_" + thisSub; subclass2Class[thisSub] = treatment; vector temp; temp.push_back(j); subClass2GroupIndex[thisSub] = temp; }else { subClass2GroupIndex[thisSub].push_back(j); } } map >::iterator itClass = class2SubClasses.find(treatment); if (itClass == class2SubClasses.end()) { set temp; temp.insert(thisSub); class2SubClasses[treatment] = temp; vector temp2; temp2.push_back(j); class2GroupIndex[treatment] = temp2; classes.push_back(treatment); }else{ class2SubClasses[treatment].insert(thisSub); class2GroupIndex[treatment].push_back(j); } } //sort classes so order is right sort(classes.begin(), classes.end()); vector< vector > means = getMeans(lookup, clr, class2GroupIndex); //[numOTUs][classes] - classes in same order as class2GroupIndex //run kruskal wallis on each otu map significantOtuLabels = runKruskalWallis(lookup, clr, designMap); int numSigBeforeWilcox = significantOtuLabels.size(); if (m->getDebug()) { m->mothurOut("[DEBUG]: completed Kruskal Wallis\n"); } //check for subclass string wilcoxString = ""; if ((subclass != "") && wilc) { significantOtuLabels = runWilcoxon(lookup, clr, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class); wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; } int numSigAfterWilcox = significantOtuLabels.size(); if (m->getDebug()) { m->mothurOut("[DEBUG]: completed Wilcoxon\n"); } m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n"); map sigOTUSLDA; if (numSigAfterWilcox > 0) { sigOTUSLDA = testLDA(lookup, clr, significantOtuLabels, class2GroupIndex, subClass2GroupIndex); m->mothurOut("Number of discriminative features with abs LDA score > " + toString(ldaThreshold) + " : " + toString(sigOTUSLDA.size()) + ".\n"); } else { m->mothurOut("No features with significant differences between the classes.\n"); } if (m->getDebug()) { m->mothurOut("[DEBUG]: completed lda\n"); } string label; vector otuNames; if (lookup != nullptr) { label = lookup->getLabel(); otuNames = lookup->getOTUNames(); } else { label = clr->getLabel(); otuNames = clr->getOTUNames(); } if (runAll) { printResultsAll(means, significantOtuLabels, sigOTUSLDA, label, classes, otuNames, combo); } else { printResults(means, significantOtuLabels, sigOTUSLDA, label, classes, otuNames, combo); } return 0; } catch(exception& e) { m->errorOut(e, "LefseCommand", "process"); exit(1); } } //********************************************************************************************************************** int LefseCommand::normalize(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr) { try { vector mul; vector namesOfGroups; int numBins = 0; int numSamples = 0; if (lookup != nullptr) { namesOfGroups = lookup->getNamesGroups(); numBins = lookup->getNumBins(); numSamples = lookup->size(); }else { namesOfGroups = clr->getNamesGroups(); numBins = clr->getNumBins(); numSamples = clr->size(); } for (int i = 0; i < numSamples; i++) { double sum = 0; if (lookup != nullptr) { sum = lookup->getNumSeqs(namesOfGroups[i]); } else { sum = clr->getNumSeqs(namesOfGroups[i]); } mul.push_back(1000000.0/sum); } for (int i = 0; i < numSamples; i++) { for (int j = 0; j < numBins; j++) { if (lookup != nullptr) { lookup->set(j, lookup->get(j, namesOfGroups[i])*mul[i], namesOfGroups[i]); } else { clr->set(j, clr->get(j, namesOfGroups[i])*mul[i], namesOfGroups[i]); } } } return 0; } catch(exception& e) { m->errorOut(e, "LefseCommand", "normalize"); exit(1); } } //********************************************************************************************************************** map LefseCommand::runKruskalWallis(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, DesignMap& designMap) { try { vector namesOfGroups; int numBins = 0; if (lookup != nullptr) { namesOfGroups = lookup->getNamesGroups(); numBins = lookup->getNumBins(); }else { namesOfGroups = clr->getNamesGroups(); numBins = clr->getNumBins(); } map significantOtuLabels; //sanity check to make sure each treatment has a group in the shared file set treatments; for (int j = 0; j < namesOfGroups.size(); j++) { string group = namesOfGroups[j]; string treatment = designMap.get(group, mclass); //get value for this group in this category treatments.insert(treatment); } if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things for classes to compare, quitting.\n"); m->setControl_pressed(true); } LinearAlgebra linear; for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } vector values; vector abunds; if (lookup != nullptr) { abunds = lookup->getOTU(i); } else { abunds = clr->getOTU(i); } for (int j = 0; j < namesOfGroups.size(); j++) { string group = namesOfGroups[j]; string treatment = designMap.get(group, mclass); //get value for this group in this category spearmanRank temp(treatment, abunds[j]); values.push_back(temp); } double pValue = 0.0; linear.calcKruskalWallis(values, pValue); if (pValue < anovaAlpha) { significantOtuLabels[i] = pValue; } } return significantOtuLabels; } catch(exception& e) { m->errorOut(e, "LefseCommand", "runKruskalWallis"); exit(1); } } //********************************************************************************************************************** //assumes not neccessarily paired map LefseCommand::runWilcoxon(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, map bins, map >& class2SubClasses, map >& subClass2GroupIndex, map subclass2Class) { try { map significantOtuLabels; map::iterator it; //if it exists and meets the following requirements run Wilcoxon /* 1. Subclass members all belong to same main class anything else */ int numBins = 0; if (lookup != nullptr) { numBins = lookup->getNumBins(); } else { numBins = clr->getNumBins(); } for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } it = bins.find(i); if (it != bins.end()) { //flagged in Kruskal Wallis vector abunds; if (lookup != nullptr) { abunds = lookup->getOTU(i); } else { abunds = clr->getOTU(i); } bool sig = testOTUWilcoxon(class2SubClasses, abunds, subClass2GroupIndex, subclass2Class); if (sig) { significantOtuLabels[i] = it->second; } }//bins flagged from kw }//for bins return significantOtuLabels; } catch(exception& e) { m->errorOut(e, "LefseCommand", "runWilcoxon"); exit(1); } } //********************************************************************************************************************** //lefse.py - test_rep_wilcoxon_r function bool LefseCommand::testOTUWilcoxon(map >& class2SubClasses, vector abunds, map >& subClass2GroupIndex, map subclass2Class) { try { int totalOk = 0; double alphaMtc = wilcoxonAlpha; vector< set > allDiffs; LinearAlgebra linear; //for each subclass comparision map >::iterator itB; for(map >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){ itB = it;itB++; for(;itB!=class2SubClasses.end();itB++){ if (m->getControl_pressed()) { return false; } bool first = true; int dirCmp = 0; // not set?? dir_cmp = "not_set" # 0=notset or none, 1=true, 2=false. int curv_sign = 0; int ok = 0; int count = 0; for (set::iterator itClass1 = (it->second).begin(); itClass1 != (it->second).end(); itClass1++) { bool br = false; for (set::iterator itClass2 = (itB->second).begin(); itClass2 != (itB->second).end(); itClass2++) { string subclass1 = *itClass1; string subclass2 = *itClass2; count++; if (m->getDebug()) { m->mothurOut( "[DEBUG comparing " + it->first + "-" + *itClass1 + " to " + itB->first + "-" + *itClass2 + "\n"); } string treatment1 = subclass2Class[subclass1]; string treatment2 = subclass2Class[subclass2]; int numSubs1 = class2SubClasses[treatment1].size(); int numSubs2 = class2SubClasses[treatment2].size(); //if mul_cor != 0: alpha_mtc = th*l_subcl1*l_subcl2 if mul_cor == 2 else 1.0-math.pow(1.0-th,l_subcl1*l_subcl2) if (strict != 0) { alphaMtc = wilcoxonAlpha * numSubs1 * numSubs2 ; } if (strict == 2) {}else{ alphaMtc = 1.0-pow((1.0-wilcoxonAlpha),(double)(numSubs1 * numSubs2)); } //fill x and y with this comparisons data vector x; vector y; //fill x and y vector xIndexes = subClass2GroupIndex[subclass1]; //indexes in lookup for this subclass vector yIndexes = subClass2GroupIndex[subclass2]; //indexes in lookup for this subclass for (int k = 0; k < yIndexes.size(); k++) { y.push_back(abunds[yIndexes[k]]); } for (int k = 0; k < xIndexes.size(); k++) { x.push_back(abunds[xIndexes[k]]); } // med_comp = False //if len(cl1) < min_c or len(cl2) < min_c: //med_comp = True bool medComp = false; // are there enough samples per subclass if ((xIndexes.size() < minC) || (yIndexes.size() < minC)) { medComp = true; } double sx = util.median(x); double sy = util.median(y); //if cl1[0] == cl2[0] and len(set(cl1)) == 1 and len(set(cl2)) == 1: //tres, first = False, False double pValue = 0.0; double H = 0.0; bool tres = true; //don't think this is set in the python source. Not sure how that is handled, but setting it here. if (util.isEqual(x[0],y[0]) && (x.size() == 1) && (y.size() == 1)) { tres = false; first = false; } else if (!medComp) { H = linear.calcWilcoxon(x, y, pValue); if (pValue < (alphaMtc*2.0)) { tres = true; } else { tres = false; } } /*if first: first = False if not curv and ( med_comp or tres ): dir_cmp = sx < sy if sx == sy: br = True elif curv: dir_cmp = None if med_comp or tres: curv_sign += 1 dir_cmp = sx < sy else: br = True elif not curv and med_comp: if ((sx < sy) != dir_cmp or sx == sy): br = True elif curv: if tres and dir_cmp == None: curv_sign += 1 dir_cmp = sx < sy if tres and dir_cmp != (sx < sy): br = True curv_sign = -1 elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True */ int sxSy = 2; //false if (sx 0) { diff = true; } } //if curv: diff = curv_sign > 0 else { //else: diff = (ok == len(cl_hie[pair[1]])*len(cl_hie[pair[0]])) diff = false; if (ok == count) { diff = true; } } if (diff) { totalOk++; } if (!diff && (multiClassStrat == "onevone")) { return false; } if (diff && (multiClassStrat == "onevall")) { //all_diff.append(pair) set pair; pair.insert(it->first); pair.insert(itB->first); allDiffs.push_back(pair); } }//classes }//classes if (multiClassStrat == "onevall") { int tot_k = class2SubClasses.size(); for(map >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){ if (m->getControl_pressed()) { return false; } int nk = 0; //is this class okay in all comparisons for (int h = 0; h < allDiffs.size(); h++) { if (allDiffs[h].count(it->first) != 0) { nk++; } } if (nk == (tot_k-1)) { return true; }//if nk == tot_k-1: return True } return false; } return true; } catch(exception& e) { m->errorOut(e, "LefseCommand", "testOTUWilcoxon"); exit(1); } } //********************************************************************************************************************** //modelled after lefse.py test_lda_r function map LefseCommand::testLDA(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, map bins, map >& class2GroupIndex, map >& subClass2GroupIndex) { try { map sigOTUS; map::iterator it; LinearAlgebra linear; Utils util; int numBins = 0; int numGroups = 0; if (lookup != nullptr) { numBins = lookup->getNumBins(); numGroups = lookup->size(); //lfk }else { numBins = clr->getNumBins(); numGroups = clr->size(); //lfk } vector< vector > adjustedLookup; for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } if (m->getDebug()) { m->mothurOut("[DEBUG]: bin = " + toString(i) + "\n."); } it = bins.find(i); if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it) if (m->getDebug()) { m->mothurOut("[DEBUG]:flagged bin = " + toString(i) + "\n."); } //fill x with this OTUs abundances vector tempx; if (lookup != nullptr) { tempx = lookup->getOTU(i); } else { tempx = clr->getOTU(i); } vector x; for (int h = 0; h < tempx.size(); h++) { x.push_back((double)tempx[h]); } //go through classes for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: class = " + it->first + "\n."); } //max(float(feats['class'].count(c))*0.5,4) //max(numGroups in this class*0.5, 4.0) double necessaryNum = ((double)((it->second).size())*0.5); if (4.0 > necessaryNum) { necessaryNum = 4.0; } set uniques; for (int j = 0; j < (it->second).size(); j++) { uniques.insert(x[(it->second)[j]]); } //if len(set([float(v[1]) for v in ff if v[0] == c])) > max(float(feats['class'].count(c))*0.5,4): continue if ((double)(uniques.size()) > necessaryNum) { } else { //feats[k][i] = math.fabs(feats[k][i] + lrand.normalvariate(0.0,max(feats[k][i]*0.05,0.01))) for (int j = 0; j < (it->second).size(); j++) { //(it->second) contains indexes of abundance for this class double sigma = max((x[(it->second)[j]]*0.05), 0.01); x[(it->second)[j]] = abs(x[(it->second)[j]] + linear.normalvariate(0.0, sigma)); } } } adjustedLookup.push_back(x); } } //go through classes int minCl = MOTHURMAX; map indexToClass; vector classes; for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { //class with minimum number of groups if ((it->second).size() < minCl) { minCl = (it->second).size(); } for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; } classes.push_back(it->first); } int fractionNumGroups = numGroups * fBoots; //rfk minCl = (int)((float)(minCl*fBoots*fBoots*0.05)); minCl = max(minCl, 1); if (m->getDebug()) { m->mothurOut("[DEBUG]: about to start iters. FractionGroups = " + toString(fractionNumGroups) + "\n."); } vector< vector< vector > > results;//[iters][numComparison][numOTUs] for (int j = 0; j < iters; j++) { if (m->getControl_pressed()) { return sigOTUS; } if (m->getDebug()) { m->mothurOut("[DEBUG]: iter = " + toString(j) + "\n."); } //find "good" random vector vector rand_s; int save = 0; for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1 save = h; rand_s.clear(); for (int k = 0; k < fractionNumGroups; k++) { int index = util.getRandomIndex(numGroups-1); rand_s.push_back(index); } if (!contastWithinClassesOrFewPerClass(adjustedLookup, rand_s, minCl, class2GroupIndex, indexToClass)) { h+=1000; save += 1000; } //break out of loop } if (m->getControl_pressed()) { return sigOTUS; } if (m->getDebug()) { m->mothurOut("[DEBUG]: after 1000. \n."); } if (save < 1000) { m->mothurOut("[WARNING]: Skipping iter " + toString(j+1) + " in LDA test. This can be caused by too few groups per class or not enough contrast within the classes. \n"); } else { //for each pair of classes vector< vector > temp = lda(adjustedLookup, rand_s, indexToClass, classes); //[numComparison][numOTUs] if (temp.size() != 0) { results.push_back(temp); } if (m->getDebug()) { m->mothurOut("[DEBUG]: after lda. \n."); } } } if (results.size() == 0) { return sigOTUS; } if (m->getControl_pressed()) { return sigOTUS; } //m = max([numpy.mean([means[k][kk][p] for kk in range(boots)]) for p in range(len(pairs))]) int k = 0; for (it = bins.begin(); it != bins.end(); it++) { //[numOTUs] - need to go through bins so we can tie adjustedLookup back to the binNumber. adjustedLookup[0] ->bins entry[0]. vector averageForEachComparison; averageForEachComparison.resize(results[0].size(), 0.0); double maxM = 0.0; //max of averages for each comparison for (int j = 0; j < results[0].size(); j++) { //numComparisons for (int i = 0; i < results.size(); i++) { //iters averageForEachComparison[j]+= results[i][j][k]; } averageForEachComparison[j] /= (double) results.size(); if (averageForEachComparison[j] > maxM) { maxM = averageForEachComparison[j]; } } //res[k] = math.copysign(1.0,m)*math.log(1.0+math.fabs(m),10) double multiple = 1.0; if (maxM < 0.0) { multiple = -1.0; } double resK = multiple * log10(1.0+abs(maxM)); if (resK > ldaThreshold) { sigOTUS[it->first] = resK; } k++; } return sigOTUS; } catch(exception& e) { m->errorOut(e, "LefseCommand", "testLDA"); exit(1); } } //********************************************************************************************************************** vector< vector > LefseCommand::getMeans(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, map >& class2GroupIndex) { try { int numBins = 0; if (lookup != nullptr) { numBins = lookup->getNumBins(); } else { numBins = clr->getNumBins(); } int numClasses = class2GroupIndex.size(); vector< vector > means; //[numOTUS][classes] means.resize(numBins); for (int i = 0; i < means.size(); i++) { means[i].resize(numClasses, 0.0); } map indexToClass; int count = 0; //shortcut for vectors below map quickIndex; vector classCounts; for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; } quickIndex[it->first] = count; count++; classCounts.push_back((it->second).size()); } for (int i = 0; i < numBins; i++) { vector abunds; if (lookup != nullptr) { abunds = lookup->getOTU(i); } else { abunds = clr->getOTU(i); } for (int j = 0; j < abunds.size(); j++) { if (m->getControl_pressed()) { return means; } means[i][quickIndex[indexToClass[j]]] += abunds[j]; } } for (int i = 0; i < numBins; i++) { for (int j = 0; j < numClasses; j++) { means[i][j] /= (double) classCounts[j]; } } return means; } catch(exception& e) { m->errorOut(e, "LefseCommand", "getMeans"); exit(1); } } //********************************************************************************************************************** vector< vector > LefseCommand::lda(vector< vector >& adjustedLookup, vector rand_s, map& indexToClass, vector classes) { try { //shortcut for vectors below map quickIndex; for (int i = 0; i < classes.size(); i++) { quickIndex[classes[i]] = i; } vector randClass; //classes for rand sample vector counts; counts.resize(classes.size(), 0); for (int i = 0; i < rand_s.size(); i++) { string thisClass = indexToClass[rand_s[i]]; randClass.push_back(thisClass); counts[quickIndex[thisClass]]++; } vector< vector > a; //[numOTUs][numSampled] for (int i = 0; i < adjustedLookup.size(); i++) { vector temp; for (int j = 0; j < rand_s.size(); j++) { temp.push_back(adjustedLookup[i][rand_s[j]]); } a.push_back(temp); } LinearAlgebra linear; vector< vector > means; bool ignore; vector< vector > scaling = linear.lda(a, randClass, means, ignore); //means are returned sorted, quickIndex sorts as well since it uses a map. means[class][otu] = if (ignore) { scaling.clear(); return scaling; } if (m->getControl_pressed()) { return scaling; } vector< vector > w; w.resize(a.size()); //w.unit <- w/sqrt(sum(w^2)) double denom = 0.0; for (int i = 0; i < scaling.size(); i++) { w[i].push_back(scaling[i][0]); denom += (w[i][0]*w[i][0]); } denom = sqrt(denom); for (int i = 0; i < w.size(); i++) { w[i][0] /= denom; } //[numOTUs][1] - w.unit //robjects.r('LD <- xy.matrix%*%w.unit') [numSampled][numOtus] * [numOTUs][1] vector< vector > LD = linear.matrix_mult(linear.transpose(a), w); //find means for each groups LDs vector LDMeans; LDMeans.resize(classes.size(), 0.0); //means[0] -> average for [group0]. for (int i = 0; i < LD.size(); i++) { LDMeans[quickIndex[randClass[i]]] += LD[i][0]; } for (int i = 0; i < LDMeans.size(); i++) { LDMeans[i] /= (double) counts[i]; } //calculate for each comparisons i.e. with groups A,B,C = AB, AC, BC = 3; vector< vector > results;// [numComparison][numOTUs] for (int i = 0; i < LDMeans.size(); i++) { for (int l = 0; l < i; l++) { if (m->getControl_pressed()) { return scaling; } //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))') double effectSize = abs(LDMeans[i] - LDMeans[l]); //scal = robjects.r('wfinal <- w.unit * effect.size') vector compResults; for (int j = 0; j < w.size(); j++) { //[numOTUs][1] //coeff = [abs(float(v)) if not math.isnan(float(v)) else 0.0 for v in scal] double coeff = abs(w[j][0]*effectSize); if (isnan(coeff) || isinf(coeff)) { coeff = 0.0; } //gm = abs(res[p[0]][j] - res[p[1]][j]) - res is the means for each group for each otu double gm = abs(means[i][j] - means[l][j]); //means[k][i].append((gm+coeff[j])*0.5) compResults.push_back((gm+coeff)*0.5); } results.push_back(compResults); } } return results; } catch(exception& e) { m->errorOut(e, "LefseCommand", "lda"); exit(1); } } //********************************************************************************************************************** //modelled after lefse.py contast_within_classes_or_few_per_class function bool LefseCommand::contastWithinClassesOrFewPerClass(vector< vector >& lookup, vector rands, int minCl, map > class2GroupIndex, map indexToClass) { try { map cls; int countFound = 0; for (int i = 0; i < rands.size(); i++) { //fill cls with the classes represented in the random selection for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { if (util.inUsersGroups(rands[i], (it->second))) { map::iterator itClass = cls.find(it->first); if (itClass != cls.end()) { itClass->second++; } else { cls[it->first] = 1; } countFound++; } } } //sanity check if (rands.size() != countFound) { m->mothurOut("oops, should never get here, missing something.\n"); } if (cls.size() < class2GroupIndex.size()) { return true; } //some classes are not present in sampling for (map::iterator itClass = cls.begin(); itClass != cls.end(); itClass++) { if (itClass->second < minCl) { return true; } //this sampling has class count below minimum } //for this otu int numBins = lookup.size(); for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } //break up random sampling by class map > class2Values; //maps class name -> set of abunds present in random sampling. F003Early -> 0.001, 0.003... for (int j = 0; j < rands.size(); j++) { class2Values[indexToClass[rands[j]]].insert(lookup[i][rands[j]]); //rands[j] = index of randomly selected group in lookup, randIndex2Class[rands[j]] = class this group belongs to. lookup[rands[j]]->getAbundance(i) = abundance of this group for this OTU. } //are the unique values less than we want //if (len(set(col)) <= min_cl and min_cl > 1) or (min_cl == 1 and len(set(col)) <= 1): for (map >::iterator it = class2Values.begin(); it != class2Values.end(); it++) { if (((it->second).size() <= minCl && minCl > 1) || (minCl == 1 && (it->second).size() <= 1)) { return true; } } } return false; } catch(exception& e) { m->errorOut(e, "LefseCommand", "contastWithinClassesOrFewPerClass"); exit(1); } } //********************************************************************************************************************** void LefseCommand::printResults(vector< vector > means, map sigKW, map sigLDA, string label, vector classes, vector currentLabels, string comboName) { try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = label; if (pairwise) { variables["[combo]"] = comboName; } string outputFileName = getOutputFileName("summary",variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); //output headers out << "OTU\tlogMaxMean\tClass\tLDA\tpValue\n"; string temp = ""; for (int i = 0; i < means.size(); i++) { //[numOTUs] //find max mean of classes double maxMean = -1.0; int maxClassIndex = 0; for (int j = 0; j < means[i].size(); j++) { if (means[i][j] > maxMean) { maxMean = means[i][j]; maxClassIndex = j; } } //str(math.log(max(max(v),1.0),10.0)) double logMaxMean = 1.0; if (maxMean > logMaxMean) { logMaxMean = maxMean; } logMaxMean = log10(logMaxMean); //print maximum first out << currentLabels[i] << '\t' << logMaxMean << '\t' << classes[maxClassIndex] << '\t'; if (m->getDebug()) { temp = currentLabels[i] + '\t' + toString(logMaxMean) + '\t' + classes[maxClassIndex] + '\t'; } map::iterator it = sigLDA.find(i); if (it != sigLDA.end()) { out << it->second << '\t' << sigKW[i] << endl; //sigLDA is a subset of sigKW so no need to look if (m->getDebug()) { temp += toString(it->second) + '\t' + toString(sigKW[i]) + '\n'; m->mothurOut(temp); temp = ""; } }else { out << "NA\tNA" << endl; } } out.close(); return; } catch(exception& e) { m->errorOut(e, "LefseCommand", "printResults"); exit(1); } } //********************************************************************************************************************** void LefseCommand::printResultsAll(vector< vector > means, map sigKW, map sigLDA, string label, vector classes, vector currentLabels, string comboName) { try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = label; if (pairwise) { variables["[combo]"] = comboName; } string outputFileName = getOutputFileName("summary",variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); //output headers out << "OTU\tlogMaxMean\tLDA\tpValue\n"; string temp = ""; for (int i = 0; i < means.size(); i++) { //[numOTUs] //find max mean of classes double maxMean = -1.0; for (int j = 0; j < means[i].size(); j++) { if (means[i][j] > maxMean) { maxMean = means[i][j]; } } //str(math.log(max(max(v),1.0),10.0)) double logMaxMean = 1.0; if (maxMean > logMaxMean) { logMaxMean = maxMean; } logMaxMean = log10(logMaxMean); //print maximum first out << currentLabels[i] << '\t' << logMaxMean << '\t'; if (m->getDebug()) { temp = currentLabels[i] + '\t' + toString(logMaxMean) + '\t'; } map::iterator it = sigLDA.find(i); if (it != sigLDA.end()) { out << it->second << '\t' << sigKW[i] << endl; //sigLDA is a subset of sigKW so no need to look if (m->getDebug()) { temp += toString(it->second) + '\t' + toString(sigKW[i]) + '\n'; m->mothurOut(temp); temp = ""; } }else { out << "NA\tNA" << endl; } } out.close(); return; } catch(exception& e) { m->errorOut(e, "LefseCommand", "printResultsAll"); exit(1); } } //********************************************************************************************************************** //printToCoutForRTesting(adjustedLookup, rand_s, class2GroupIndex, numBins); bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLookup, vector rand_s, map >& class2GroupIndex, map bins, map >& subClass2GroupIndex, vector groups, vector currentLabels) { try { cout << "rand_s = "; for (int h = 0; h < rand_s.size(); h++) { cout << rand_s[h] << '\t'; } cout << endl; //print otu data int count = 0; for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->getControl_pressed()) { break; } cout << currentLabels[it->first] << " <- c("; for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; } cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n"; count++; } string tempOutput = "treatments <- c("; for (int h = 0; h < rand_s.size(); h++) { //find class this index is in for (map >::iterator it = class2GroupIndex.begin(); it!= class2GroupIndex.end(); it++) { if (util.inUsersGroups(rand_s[h], (it->second)) ) { tempOutput += "\"" +it->first + "\"" + ","; } //"\"" +it->first + "\"" } } tempOutput = tempOutput.substr(0, tempOutput.length()-1); tempOutput += ")\n"; cout << tempOutput; //print data frame tempOutput = "dat <- data.frame("; for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->getControl_pressed()) { break; } tempOutput += "\"" + currentLabels[it->first] + "\"=" + currentLabels[it->first] + ","; } tempOutput += " class=treatments"; tempOutput += ")\n"; cout << tempOutput; tempOutput = "z <- suppressWarnings(mylda(as.formula(class ~ "; for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->getControl_pressed()) { break; } tempOutput += currentLabels[it->first] + "+"; } tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign tempOutput += "), data = dat, tol = 1e-10))"; cout << tempOutput + "\nz\n"; cout << "w <- z$scaling[,1]\n"; //robjects.r('w <- z$scaling[,1]') cout << "w.unit <- w/sqrt(sum(w^2))\n"; //robjects.r('w.unit <- w/sqrt(sum(w^2))') cout << "ss <- dat[,-match(\"class\",colnames(dat))]\n"; //robjects.r('ss <- sub_d[,-match("class",colnames(sub_d))]') cout << "xy.matrix <- as.matrix(ss)\n"; //robjects.r('xy.matrix <- as.matrix(ss)') cout << "LD <- xy.matrix%*%w.unit\n"; //robjects.r('LD <- xy.matrix%*%w.unit') cout << "effect.size <- abs(mean(LD[dat[,\"class\"]==\"'+p[0]+'\"]) - mean(LD[dat[,\"class\"]==\"'+p[1]+'\"]))\n"; //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))') cout << "wfinal <- w.unit * effect.size\n"; //scal = robjects.r('wfinal <- w.unit * effect.size') cout << "mm <- z$means\n"; //rres = robjects.r('mm <- z$means') return true; } catch(exception& e) { m->errorOut(e, "LefseCommand", "printToCoutForRTesting"); exit(1); } } /******************************************/ mothur-1.48.0/source/commands/lefsecommand.h000077500000000000000000000113671424121717000210430ustar00rootroot00000000000000// // lefsecommand.h // Mothur // // Created by SarahsWork on 6/12/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef __Mothur__lefsecommand__ #define __Mothur__lefsecommand__ #include "command.hpp" /* Columns = groups, rows are OTUs, class = design From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings. Step 1: the Kruskall-Wallis test analyzes all features, testing whether the values in different classes are differentially distributed. Features violating the null hypothesis are further analyzed in Step 2. Step 2: the pairwise Wilcoxon test checks whether all pairwise comparisons between subclasses within different classes significantly agree with the class level trend. Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes. */ #include "command.hpp" #include "inputdata.h" #include "designmap.h" #include "sharedclrvectors.hpp" /**************************************************************************************************/ class LefseCommand : public Command { public: LefseCommand(string); ~LefseCommand(){} vector setParameters(); string getCommandName() { return "lefse"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "Segata, N., J. Izard, L. Waldron, D. Gevers, L. Miropolsky, W. S. Garrett, and C. Huttenhower. 2011. Metagenomic biomarker discovery and explanation. Genome Biol 12:R60, http://www.mothur.org/wiki/Lefse"; } string getDescription() { return "brief description"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, wilc, wilcsamename, curv, subject, normMillion, pairwise, runAll; string sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat, sets, inputfile, clrfile, format; vector outputNames, Sets; set labels; double anovaAlpha, wilcoxonAlpha, fBoots, ldaThreshold; int nlogs, iters, strict, minC; int process(SharedRAbundFloatVectors*&, SharedCLRVectors*&, DesignMap&, string combo); int normalize(SharedRAbundFloatVectors*&, SharedCLRVectors*&); map runKruskalWallis(SharedRAbundFloatVectors*&, SharedCLRVectors*&, DesignMap&); map runWilcoxon(SharedRAbundFloatVectors*&, SharedCLRVectors*&, map, map >& class2SubClasses, map >& subClass2GroupIndex, map); map testLDA(SharedRAbundFloatVectors*&, SharedCLRVectors*&, map, map >& class2GroupIndex, map >&); vector< vector > getMeans(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, map >& class2GroupIndex); bool contastWithinClassesOrFewPerClass(vector< vector >&, vector rands, int minCl, map > class2GroupIndex, map indexToClass); vector< vector > lda(vector< vector >& adjustedLookup, vector rand_s, map& indexToClass, vector); bool testOTUWilcoxon(map >& class2SubClasses, vector abunds, map >& subClass2GroupIndex, map); void printResults(vector< vector >, map, map, string, vector, vector, string); void printResultsAll(vector< vector >, map, map, string, vector, vector, string); //for testing bool printToCoutForRTesting(vector< vector >& adjustedLookup, vector rand_s, map >& class2GroupIndex, map bins, map >&, vector, vector); int makeShared(int); void runPairwiseAnalysis(DesignMap&); }; /**************************************************************************************************/ #endif /* defined(__Mothur__lefsecommand__) */ mothur-1.48.0/source/commands/libshuffcommand.cpp000077500000000000000000000521361424121717000221010ustar00rootroot00000000000000/* * libshuffcommand.cpp * Mothur * * Created by Sarah Westcott on 3/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class is designed to implement an integral form of the Cramer-von Mises statistic. you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0 DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */ #include "libshuffcommand.h" #include "libshuff.h" #include "slibshuff.h" #include "dlibshuff.h" //********************************************************************************************************************** vector LibShuffCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","coverage-libshuffsummary",false,true,true); parameters.push_back(pphylip); CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter piters("iters", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pstep("step", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pstep); CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pform("form", "Multiple", "discrete-integral", "integral", "", "", "","",false,false); parameters.push_back(pform); CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["coverage"] = tempOutNames; outputTypes["libshuffsummary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string LibShuffCommand::getHelpString(){ try { string helpString = ""; helpString += "The libshuff command parameters are phylip, group, sim, groups, iters, step, form and cutoff. phylip and group parameters are required, unless you have valid current files.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random matrices you would like compared to your matrix.\n"; helpString += "The step parameter allows you to specify change in distance you would like between each output if you are using the discrete form.\n"; helpString += "The form parameter allows you to specify if you would like to analyze your matrix using the discrete or integral form. Your options are integral or discrete.\n"; helpString += "The libshuff command should be in the following format: libshuff(groups=yourGroups, iters=yourIters, cutOff=yourCutOff, form=yourForm, step=yourStep).\n"; helpString += "Example libshuff(groups=A-B-C, iters=500, form=discrete, step=0.01, cutOff=2.0).\n"; helpString += "The default value for groups is all the groups in your groupfile, iters is 10000, cutoff is 1.0, form is integral and step is 0.01.\n"; helpString += "The libshuff command output two files: .coverage and .slsummary their descriptions are in the manual.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string LibShuffCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "coverage") { pattern = "[filename],libshuff.coverage"; } else if (type == "libshuffsummary") { pattern = "[filename],libshuff.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** LibShuffCommand::LibShuffCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); map::iterator it; ValidParameters validParameter; phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You must provide a phylip file.\n"); abort = true; } }else { current->setPhylipFile(phylipfile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { m->mothurOut("You must provide a group file.\n"); abort = true; } }else { current->setGroupFile(groupfile); } if (outputdir == ""){ outputdir += util.hasPath(phylipfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; savegroups = groups; } else { savegroups = groups; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp; temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "10000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "1.0"; } util.mothurConvert(temp, cutOff); temp = validParameter.valid(parameters, "step"); if (temp == "not found") { temp = "0.01"; } util.mothurConvert(temp, step); temp = validParameter.valid(parameters, "sim"); if (temp == "not found") { temp = "F"; } sim = util.isTrue(temp); userform = validParameter.valid(parameters, "form"); if (userform == "not found") { userform = "integral"; } } } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "LibShuffCommand"); exit(1); } } //********************************************************************************************************************** int LibShuffCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read files groupMap = new GroupMap(groupfile); int error = groupMap->readMap(); if (error == 1) { delete groupMap; return 0; } ifstream in; util.openInputFile(phylipfile, in); matrix = new FullMatrix(in, groupMap, sim); //reads the matrix file in.close(); if (m->getControl_pressed()) { delete groupMap; delete matrix; return 0; } //if files don't match... if (matrix->getNumSeqs() < groupMap->getNumSeqs()) { m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); m->mothurOutEndLine(); //create new group file if(outputdir == "") { outputdir += util.hasPath(groupfile); } string newGroupFile = outputdir + util.getRootName(util.getSimpleName(groupfile)) + "editted.groups"; outputNames.push_back(newGroupFile); ofstream outGroups; util.openOutputFile(newGroupFile, outGroups); for (int i = 0; i < matrix->getNumSeqs(); i++) { if (m->getControl_pressed()) { delete groupMap; delete matrix; outGroups.close(); util.mothurRemove(newGroupFile); return 0; } Names temp = matrix->getRowInfo(i); outGroups << temp.seqName << '\t' << temp.groupName << endl; } outGroups.close(); m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead.\n"); //read new groupfile delete groupMap; groupfile = newGroupFile; groupMap = new GroupMap(groupfile); groupMap->readMap(); if (m->getControl_pressed()) { delete groupMap; delete matrix; util.mothurRemove(newGroupFile); return 0; } } setGroups(); //set the groups to be analyzed and sorts them if (numGroups < 2) { m->mothurOut("[ERROR]: libshuff requires at least 2 groups, you only have " + toString(numGroups) + ", aborting.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { delete groupMap; delete matrix; return 0; } /********************************************************************************************/ //this is needed because when we read the matrix we sort it into groups in alphabetical order //the rest of the command and the classes used in this command assume specific order /********************************************************************************************/ matrix->setGroups(groupMap->getNamesOfGroups()); vector sizes; for (int i = 0; i < (groupMap->getNamesOfGroups()).size(); i++) { sizes.push_back(groupMap->getNumSeqs((groupMap->getNamesOfGroups())[i])); } matrix->setSizes(sizes); if(userform == "discrete"){ form = new DLibshuff(matrix, iters, step, cutOff); } else{ form = new SLibshuff(matrix, iters, cutOff); } savedDXYValues = form->evaluateAll(); savedMinValues = form->getSavedMins(); if (m->getControl_pressed()) { delete form; delete matrix; delete groupMap; return 0; } pValueCounts.resize(numGroups); for(int i=0;igetControl_pressed()) { outputTypes.clear(); delete form; delete matrix; delete groupMap; return 0; } for(int i=0;igetControl_pressed()) { outputTypes.clear(); delete form; delete matrix; delete groupMap; return 0; } int spoti = groupMap->groupIndex[Groups[i]]; //neccessary in case user selects groups so you know where they are in the matrix int spotj = groupMap->groupIndex[Groups[j]]; for(int p=0;pgetControl_pressed()) { outputTypes.clear(); delete form; delete matrix; delete groupMap; return 0; } form->randomizeGroups(spoti,spotj); if(form->evaluatePair(spoti,spotj) >= savedDXYValues[spoti][spotj]) { pValueCounts[i][j]++; } if(form->evaluatePair(spotj,spoti) >= savedDXYValues[spotj][spoti]) { pValueCounts[j][i]++; } if (m->getControl_pressed()) { outputTypes.clear(); delete form; delete matrix; delete groupMap; return 0; } } form->resetGroup(spoti); form->resetGroup(spotj); } } if (m->getControl_pressed()) { outputTypes.clear(); delete form; delete matrix; delete groupMap; return 0; } m->mothurOutEndLine(); printSummaryFile(); printCoverageFile(); delete form; delete matrix; delete groupMap; if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "execute"); exit(1); } } //********************************************************************************************************************** int LibShuffCommand::printCoverageFile() { try { ofstream outCov; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipfile)); summaryFile = getOutputFileName("coverage", variables); util.openOutputFile(summaryFile, outCov); outputNames.push_back(summaryFile); outputTypes["coverage"].push_back(summaryFile); outCov.setf(ios::fixed, ios::floatfield); outCov.setf(ios::showpoint); map > allDistances; map >::iterator it; vector > indices(numGroups); int numIndices = numGroups * numGroups; int index = 0; for(int i=0;igroupIndex[Groups[i]]; //neccessary in case user selects groups so you know where they are in the matrix int spotj = groupMap->groupIndex[Groups[j]]; for(int k=0;kgetControl_pressed()) { outCov.close(); return 0; } if(allDistances[savedMinValues[spoti][spotj][k]].size() != 0){ allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]]++; } else{ allDistances[savedMinValues[spoti][spotj][k]].assign(numIndices, 0); allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]] = 1; } } } } it=allDistances.begin(); vector prevRow = it->second; it++; for(;it!=allDistances.end();it++){ for(int i=0;isecond.size();i++){ it->second[i] += prevRow[i]; } prevRow = it->second; } vector lastRow = allDistances.rbegin()->second; outCov << setprecision(8); outCov << "dist"; for (int i = 0; i < numGroups; i++){ outCov << '\t' << Groups[i]; } for (int i=0;igetControl_pressed()) { outCov.close(); return 0; } outCov << '\t' << Groups[i] << '-' << Groups[j] << '\t'; outCov << Groups[j] << '-' << Groups[i]; } } outCov << endl; for(it=allDistances.begin();it!=allDistances.end();it++){ outCov << it->first << '\t'; for(int i=0;isecond[indices[i][i]]/(float)lastRow[indices[i][i]] << '\t'; } for(int i=0;igetControl_pressed()) { outCov.close(); return 0; } outCov << it->second[indices[i][j]]/(float)lastRow[indices[i][j]] << '\t'; outCov << it->second[indices[j][i]]/(float)lastRow[indices[j][i]] << '\t'; } } outCov << endl; } outCov.close(); return 0; } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "printCoverageFile"); exit(1); } } //********************************************************************************************************************** int LibShuffCommand::printSummaryFile() { try { ofstream outSum; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipfile)); summaryFile = getOutputFileName("libshuffsummary",variables); util.openOutputFile(summaryFile, outSum); outputNames.push_back(summaryFile); outputTypes["libshuffsummary"].push_back(summaryFile); outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); cout << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl; m->mothurOutJustToLog("Comparison\tdCXYScore\tSignificance\n"); outSum << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl; int precision = (int)log10(iters); for(int i=0;igetControl_pressed()) { outSum.close(); return 0; } int spoti = groupMap->groupIndex[Groups[i]]; //neccessary in case user selects groups so you know where they are in the matrix int spotj = groupMap->groupIndex[Groups[j]]; if(pValueCounts[i][j]){ cout << setw(20) << left << Groups[i]+'-'+Groups[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; m->mothurOutJustToLog(Groups[i]+"-"+Groups[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); m->mothurOutEndLine(); outSum << setw(20) << left << Groups[i]+'-'+Groups[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl; } else{ cout << setw(20) << left << Groups[i]+'-'+Groups[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <mothurOutJustToLog(Groups[i]+"-"+Groups[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((1/(float)iters))); m->mothurOutEndLine(); outSum << setw(20) << left << Groups[i]+'-'+Groups[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <mothurOutJustToLog(Groups[j]+"-"+Groups[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((pValueCounts[j][i]/(float)iters))); m->mothurOutEndLine(); outSum << setw(20) << left << Groups[j]+'-'+Groups[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl; } else{ cout << setw(20) << left << Groups[j]+'-'+Groups[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <mothurOutJustToLog(Groups[j]+"-"+Groups[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((1/(float)iters))); m->mothurOutEndLine(); outSum << setw(20) << left << Groups[j]+'-'+Groups[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <errorOut(e, "LibShuffCommand", "printSummaryFile"); exit(1); } } //********************************************************************************************************************** void LibShuffCommand::setGroups() { try { vector myGroups = Groups; //if the user has not entered specific groups to analyze then do them all if (Groups.size() == 0) { numGroups = groupMap->getNumGroups(); for (int i=0; i < numGroups; i++) { myGroups.push_back((groupMap->getNamesOfGroups())[i]); } } else { if (savegroups != "all") { //check that groups are valid for (int i = 0; i < myGroups.size(); i++) { if (groupMap->isValidGroup(myGroups[i]) != true) { m->mothurOut(myGroups[i] + " is not a valid group, and will be disregarded.\n"); // erase the invalid group from globaldata->Groups myGroups.erase(myGroups.begin()+i); } } //if the user only entered invalid groups if ((myGroups.size() == 0) || (myGroups.size() == 1)) { numGroups = groupMap->getNumGroups(); for (int i=0; i < numGroups; i++) { myGroups.push_back((groupMap->getNamesOfGroups())[i]); } m->mothurOut("When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile.\n"); } else { numGroups = myGroups.size(); } } else { //users wants all groups numGroups = groupMap->getNumGroups(); myGroups.clear(); for (int i=0; i < numGroups; i++) { myGroups.push_back((groupMap->getNamesOfGroups())[i]); } } } //sort so labels match sort(myGroups.begin(), myGroups.end()); for (int i = 0; i < (groupMap->getNamesOfGroups()).size(); i++) { groupMap->groupIndex[(groupMap->getNamesOfGroups())[i]] = i; } Groups = myGroups; numGroups = Groups.size(); } catch(exception& e) { m->errorOut(e, "LibShuffCommand", "setGroups"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/libshuffcommand.h000077500000000000000000000034041424121717000215400ustar00rootroot00000000000000#ifndef LIBSHUFFCOMMAND_H #define LIBSHUFFCOMMAND_H /* * libshuffcommand.h * Mothur * * Created by Sarah Westcott on 3/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "fullmatrix.h" #include "libshuff.h" #include "groupmap.h" class LibShuffCommand : public Command { public: LibShuffCommand(string); ~LibShuffCommand(){}; vector setParameters(); string getCommandName() { return "libshuff"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Singleton DR, Furlong MA, Rathbun SL, Whitman WB (2001). Quantitative comparisons of 16S rRNA gene sequence libraries from environmental samples. Appl Environ Microbiol 67: 4374-6. \nSchloss PD, Larget BR, Handelsman J (2004). Integration of microbial ecology and statistics: a test to compare gene libraries. Appl Environ Microbiol 70: 5485-92. \nhttp://www.mothur.org/wiki/Libshuff"; } string getDescription() { return "a generic test that describes whether two or more communities have the same structure using the Cramer-von Mises test statistic"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: void setGroups(); int printCoverageFile(); int printSummaryFile(); GroupMap* groupMap; FullMatrix* matrix; Libshuff* form; float cutOff, step; int numGroups, numComp, iters; string coverageFile, summaryFile, phylipfile, groupfile; vector > pValueCounts; vector > savedDXYValues; vector > > savedMinValues; bool abort, sim; string outputFile, groups, userform, savegroups; vector Groups, outputNames; //holds groups to be used }; #endif mothur-1.48.0/source/commands/listotuscommand.cpp000077500000000000000000000307461424121717000221700ustar00rootroot00000000000000// // listotucommand.cpp // Mothur // // Created by Sarah Westcott on 5/15/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "listotuscommand.h" #include "inputdata.h" //********************************************************************************************************************** vector ListOtusCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","accnos",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","accnos",false,false); parameters.push_back(prelabund); CommandParameter plist("list", "InputTypes", "", "", "SharedRel", "SharedRel", "none","accnos",false,false); parameters.push_back(plist); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "SharedRel", "SharedRel", "none","accnos",false,false); parameters.push_back(pconstaxonomy); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["accnos"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ListOtusCommand::getHelpString(){ try { string helpString = ""; helpString += "The list.otus lists otu labels from shared, relabund, list or constaxonomy file. The results can be used by the get.otus to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; helpString += "The list.otulabels parameters are: shared, relabund, label and groups.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; helpString += "The list.otulabels commmand should be in the following format: \n"; helpString += "list.otulabels(shared=yourSharedFile, groups=yourGroup1-yourGroup2)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ListOtusCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "accnos") { pattern = "[filename],[distance],accnos-[filename],accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ListOtusCommand::ListOtusCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; format = "sharedfile"; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; format = "relabund"; current->setRelAbundFile(relabundfile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { inputFileName = listfile; format = "list"; current->setListFile(listfile); } constaxonomy = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomy == "not open") { abort = true; } else if (constaxonomy == "not found") { constaxonomy = ""; } else { inputFileName = constaxonomy; format = "constaxonomy"; current->setConsTaxonomyFile(constaxonomy); } if ((relabundfile == "") && (sharedfile == "") && (constaxonomy == "") && (listfile== "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputFileName = listfile; format="list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { constaxonomy = current->getConsTaxonomyFile(); if (constaxonomy != "") { inputFileName = constaxonomy; format="constaxonomy"; m->mothurOut("Using " + constaxonomy + " as input file for the constaxonomy parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared, list, relabund or constaxonomy file.\n"); abort = true; } } } } } if (outputdir == ""){ outputdir = util.hasPath(inputFileName); } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } } } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "ListOtusCommand"); exit(1); } } //********************************************************************************************************************** int ListOtusCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(inputFileName, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; if (format == "relabund") { SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } printList(lookup->getOTUNames(), lookup->getLabel()); delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "sharedfile") { SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } printList(lookup->getOTUNames(), lookup->getLabel()); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "list") { ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } printList(list->getLabels(), list->getLabel()); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "constaxonomy") { createList(constaxonomy); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set relabund file as new current relabundfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ListOtusCommand::printList(vector currentLabels, string distance){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[distance]"] = distance; string outputFileName = getOutputFileName("accnos",variables); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); for (int i = 0; i < currentLabels.size(); i++) { out << currentLabels[i] << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "printList"); exit(1); } } //********************************************************************************************************************** int ListOtusCommand::createList(string constaxFile){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("accnos",variables); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(constaxFile, in); string otuLabel; //read headers string headers = util.getline(in); while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> otuLabel; string junk = util.getline(in); gobble(in); out << otuLabel << endl; } in.close(); out.close(); return 0; } catch(exception& e) { m->errorOut(e, "ListOtusCommand", "createList"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/listotuscommand.h000077500000000000000000000032651424121717000216310ustar00rootroot00000000000000#ifndef Mothur_listotucommand_h #define Mothur_listotucommand_h // // listotucommand.h // Mothur // // Created by Sarah Westcott on 5/15/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "listvector.hpp" #include "sharedrabundvectors.hpp" #include "sharedrabundfloatvectors.hpp" /**************************************************************************************************/ class ListOtusCommand : public Command { public: ListOtusCommand(string); ~ListOtusCommand(){} vector setParameters(); string getCommandName() { return "list.otus"; } string getCommandCategory() { return "OTU-Based Approaches"; } //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; } string getDescription() { return "lists otu labels from shared or relabund file. Can be used by get.otus with output from classify.otu, otu.association, or corr.axes to select specific otus."; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; string sharedfile, relabundfile, label, inputFileName, format, listfile, constaxonomy; vector outputNames; vector Groups; set labels; int printList(vector currentLabels, string distance); int createList(string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/listseqscommand.cpp000077500000000000000000000451771424121717000221550ustar00rootroot00000000000000/* * listseqscommand.cpp * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "listseqscommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "counttable.h" #include "fastqread.h" //********************************************************************************************************************** vector ListSeqsCommand::setParameters(){ try { CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta); CommandParameter pqfile("qfile", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pqfile); CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(ptaxonomy); CommandParameter palignreport("alignreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(palignreport); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false); parameters.push_back(pcontigsreport); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["accnos"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ListSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq, qfile, alignreport or contigsreport file and outputs a .accnos file containing sequence names.\n"; helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq, contigsreport and alignreport. You must provide one of these parameters.\n"; helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n"; helpString += "Example list.seqs(fasta=amazon.fasta).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ListSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "accnos") { pattern = "[filename],accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ListSeqsCommand::ListSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafiles = validParameter.validFiles(parameters, "fasta"); if (fastafiles.size() != 0) { if (fastafiles[0] == "not open") { abort = true; } else { current->setFastaFile(fastafiles[0]); } } namefiles = validParameter.validFiles(parameters, "name"); if (namefiles.size() != 0) { if (namefiles[0] == "not open") { abort = true; } else { current->setNameFile(namefiles[0]); } } groupfiles = validParameter.validFiles(parameters, "group"); if (groupfiles.size() != 0) { if (groupfiles[0] == "not open") { abort = true; } else { current->setGroupFile(groupfiles[0]); } } alignfiles = validParameter.validFiles(parameters, "alignreport"); if (alignfiles.size() != 0) { if (alignfiles[0] == "not open") { abort = true; } } contigsreportfiles = validParameter.validFiles(parameters, "contigsreport"); if (contigsreportfiles.size() != 0) { if (contigsreportfiles[0] == "not open") { abort = true; } else { current->setContigsReportFile(contigsreportfiles[0]); } } listfiles = validParameter.validFiles(parameters, "list"); if (listfiles.size() != 0) { if (listfiles[0] == "not open") { abort = true; } else { current->setListFile(listfiles[0]); } } taxfiles = validParameter.validFiles(parameters, "taxonomy"); if (taxfiles.size() != 0) { if (taxfiles[0] == "not open") { abort = true; } else { current->setTaxonomyFile(taxfiles[0]); } } countfiles = validParameter.validFiles(parameters, "count"); if (countfiles.size() != 0) { if (countfiles[0] == "not open") { abort = true; } else { current->setCountFile(countfiles[0]); } } fastqfiles = validParameter.validFiles(parameters, "fastq"); if (fastqfiles.size() != 0) { if (fastqfiles[0] == "not open") { abort = true; } } qualityfiles = validParameter.validFiles(parameters, "qfile"); if (qualityfiles.size() != 0) { if (qualityfiles[0] == "not open") { abort = true; } else { current->setQualFile(qualityfiles[0]); } } if ((qualityfiles.size() == 0) && (fastqfiles.size() == 0) && (countfiles.size() == 0) && (fastafiles.size() == 0) && (namefiles.size() == 0) && (listfiles.size() == 0) && (groupfiles.size() == 0) && (alignfiles.size() == 0) && (taxfiles.size() == 0) && (contigsreportfiles.size() == 0)) { m->mothurOut("You must provide a file.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "ListSeqsCommand"); exit(1); } } //********************************************************************************************************************** void addName(bool empty, string name, unordered_set& names, unordered_set& newNames) { if (empty) { newNames.insert(name); } //for first file or single file else { if (names.count(name) != 0) { newNames.insert(name); } //present in files so far so add to newNames } } #ifdef USE_BOOST //********************************************************************************************************************** void readFastq(unordered_set& names, boost::iostreams::filtering_istream& inBoost, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; while(!inBoost.eof()){ if (m->getControl_pressed()) { break; } bool ignore; FastqRead fread(inBoost, ignore, "illumina1.8+"); gobble(inBoost); if (!ignore) { addName(empty, fread.getName(), names, newNames); } } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readFastq"); exit(1); } } #endif //********************************************************************************************************************** void readFastq(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; while(!in.eof()){ if (m->getControl_pressed()) { break; } bool ignore; FastqRead fread(in, ignore, "illumina1.8+"); gobble(in); if (!ignore) { addName(empty, fread.getName(), names, newNames); } } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void readQual(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; while(!in.eof()){ if (m->getControl_pressed()) { break; } QualityScores currSeq(in); gobble(in); if (currSeq.getName() != "") { addName(empty, currSeq.getName(), names, newNames); } } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readQual"); exit(1); } } //********************************************************************************************************************** void readFasta(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; while(!in.eof()){ if (m->getControl_pressed()) { break; } Sequence currSeq(in); gobble(in); if (currSeq.getName() != "") { addName(empty, currSeq.getName(), names, newNames); } } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void readList(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; string tag = "Otu"; string readHeaders = ""; //Tells mothur to try and read headers from the file if(!in.eof()){ ListVector list(in, readHeaders, tag); //read in list vector //for each bin for (int i = 0; i < list.getNumBins(); i++) { if (m->getControl_pressed()) { break; } string bin = list.get(i); vector binnames; util.splitAtComma(bin, binnames); for (int j = 0; j < binnames.size(); j++) { addName(empty, binnames[j], names, newNames); } } } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void readNameTaxGroup(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } Utils util; string name; while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; util.getline(in); gobble(in); addName(empty, name, names, newNames); } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readNameTaxGroup"); exit(1); } } //********************************************************************************************************************** void readCount(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } CountTable ct; ct.readTable(in, false, false); if (m->getControl_pressed()) { return; } vector cnames = ct.getNamesOfSeqs(); for (int j = 0; j < cnames.size(); j++) { addName(empty, cnames[j], names, newNames); } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readCount"); exit(1); } } //********************************************************************************************************************** void readAlignContigs(unordered_set& names, ifstream& in, MothurOut*& m){ try { unordered_set newNames; bool empty = true; if (names.size() != 0) { empty=false; } string name; Utils util; util.getline(in); gobble(in); while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; util.getline(in); gobble(in); addName(empty, name, names, newNames); } names = newNames; } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "readAlignContigs"); exit(1); } } //********************************************************************************************************************** int ListSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } unordered_set names; //read functions fill names vector if (fastafiles.size() != 0) { process(fastafiles, names, &readFasta); } if (qualityfiles.size() != 0) { process(qualityfiles, names, &readQual); } if (fastqfiles.size() != 0) { process(fastqfiles, names); } if (namefiles.size() != 0) { process(namefiles, names, &readNameTaxGroup); } if (groupfiles.size() != 0) { process(groupfiles, names, &readNameTaxGroup); } if (taxfiles.size() != 0) { process(taxfiles, names, &readNameTaxGroup); } if (alignfiles.size() != 0) { process(alignfiles, names, &readAlignContigs); } if (contigsreportfiles.size() != 0) { process(contigsreportfiles, names, &readAlignContigs); } if (listfiles.size() != 0) { process(listfiles, names, &readList); } if (countfiles.size() != 0) { process(countfiles, names, &readCount); } if (m->getControl_pressed()) { outputTypes.clear(); return 0; } if (outputdir == "") { outputdir += util.hasPath(inputFileName); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); string outputFileName = getOutputFileName("accnos", variables); util.printAccnos(outputFileName, names); outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFileName); return 0; } current->setAccnosFile(outputFileName); m->mothurOut("\nOutput File Names: \n" + outputFileName + "\n\n"); //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; }catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void ListSeqsCommand::process(vector files, unordered_set& names){ try { Utils util; for (int i = 0; i < files.size(); i++) { if (m->getControl_pressed()) { break; } inputFileName = files[i]; bool gz = util.isGZ(inputFileName)[1]; if (!gz) { ifstream in; util.openInputFile(inputFileName, in); readFastq(names, in, m); in.close(); }else { #ifdef USE_BOOST ifstream in; boost::iostreams::filtering_istream inBoost; util.openInputFileBinary(inputFileName, in, inBoost); readFastq(names, inBoost, m); in.close(); inBoost.pop(); inBoost.reset(); #endif } } } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "process"); exit(1); } } //********************************************************************************************************************** void ListSeqsCommand::process(vector files, unordered_set& names, void f(unordered_set&, ifstream&, MothurOut*&)){ try { Utils util; for (int i = 0; i < files.size(); i++) { if (m->getControl_pressed()) { break; } inputFileName = files[i]; ifstream in; util.openInputFile(inputFileName, in); f(names, in, m); in.close(); } } catch(exception& e) { m->errorOut(e, "ListSeqsCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/listseqscommand.h000077500000000000000000000024511424121717000216060ustar00rootroot00000000000000#ifndef LISTSEQSCOMMAND_H #define LISTSEQSCOMMAND_H /* * listseqscommand.h * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" class ListSeqsCommand : public Command { public: ListSeqsCommand(string); ~ListSeqsCommand(){} vector setParameters(); string getCommandName() { return "list.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/List.seqs"; } string getDescription() { return "lists sequences from a list, fasta, name, group, count, fastq, taxonomy, alignreport or contigsreport file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; vector fastafiles, namefiles, groupfiles, countfiles, alignfiles, listfiles, taxfiles, fastqfiles, contigsreportfiles, qualityfiles; string format, inputFileName; bool abort; void process(vector files, unordered_set&); void process(vector files, unordered_set&, void f(unordered_set&, ifstream&, MothurOut*&)); }; #endif mothur-1.48.0/source/commands/makebiomcommand.cpp000077500000000000000000000630141424121717000220600ustar00rootroot00000000000000// // makebiomcommand.cpp // Mothur // // Created by Sarah Westcott on 4/16/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "makebiomcommand.h" #include "inputdata.h" #include "phylotree.h" //taken from http://biom-format.org/documentation/biom_format.html /* Minimal Sparse { "id":null, "format": "Biological Observation Matrix 0.9.1", "format_url": "http://biom-format.org", "type": "OTU table", "generated_by": "QIIME revision 1.4.0-dev", "date": "2011-12-19T19:00:00", "rows":[ {"id":"GG_OTU_1", "metadata":null}, {"id":"GG_OTU_2", "metadata":null}, {"id":"GG_OTU_3", "metadata":null}, {"id":"GG_OTU_4", "metadata":null}, {"id":"GG_OTU_5", "metadata":null} ], "columns": [ {"id":"Sample1", "metadata":null}, {"id":"Sample2", "metadata":null}, {"id":"Sample3", "metadata":null}, {"id":"Sample4", "metadata":null}, {"id":"Sample5", "metadata":null}, {"id":"Sample6", "metadata":null} ], "matrix_type": "sparse", "matrix_element_type": "int", "shape": [5, 6], "data":[[0,2,1], [1,0,5], [1,1,1], [1,3,2], [1,4,3], [1,5,1], [2,2,1], [2,3,4], [2,4,2], [3,0,2], [3,1,1], [3,2,1], [3,5,1], [4,1,1], [4,2,1] ] } */ /* Minimal dense { "id":null, "format": "Biological Observation Matrix 0.9.1", "format_url": "http://biom-format.org", "type": "OTU table", "generated_by": "QIIME revision 1.4.0-dev", "date": "2011-12-19T19:00:00", "rows":[ {"id":"GG_OTU_1", "metadata":null}, {"id":"GG_OTU_2", "metadata":null}, {"id":"GG_OTU_3", "metadata":null}, {"id":"GG_OTU_4", "metadata":null}, {"id":"GG_OTU_5", "metadata":null} ], "columns": [ {"id":"Sample1", "metadata":null}, {"id":"Sample2", "metadata":null}, {"id":"Sample3", "metadata":null}, {"id":"Sample4", "metadata":null}, {"id":"Sample5", "metadata":null}, {"id":"Sample6", "metadata":null} ], "matrix_type": "dense", "matrix_element_type": "int", "shape": [5,6], "data": [[0,0,1,0,0,0], [5,1,0,2,3,1], [0,0,1,4,2,0], [2,1,1,0,0,1], [0,1,1,0,0,0]] } */ //********************************************************************************************************************** vector MakeBiomCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","biom",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","biom",false,false,true); parameters.push_back(prelabund); CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy); CommandParameter preference("reftaxonomy", "InputTypes", "", "", "none", "none", "refPi","",false,false); parameters.push_back(preference); CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter ppicrust("picrust", "InputTypes", "", "", "none", "none", "refPi","shared",false,false); parameters.push_back(ppicrust); CommandParameter poutput("output", "Multiple", "simple-hdf5", "hdf5", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype); abort = false; calledHelp = false; allLines = true; //initialize outputTypes vector tempOutNames; outputTypes["biom"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["relabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeBiomCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.biom command parameters are shared, relabund, contaxonomy, metadata, groups, matrixtype, picrust, reftaxonomy and label. shared or relabund are required, unless you have a valid current file.\n"; // helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n"; helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n"; helpString += "The metadata parameter is used to provide experimental parameters to the columns. Things like 'sample1 gut human_gut'. \n"; helpString += "The picrust parameter is used to provide the greengenes OTU IDs map table. NOTE: Picrust requires a greengenes taxonomy. \n"; helpString += "The referencetax parameter is used with the picrust parameter. Picrust requires the greengenes OTU IDs to be in the biom file. \n"; helpString += "The output parameter allows you to specify format of your biom file. Options hdf5 or simple. Default is hdf5, unless you are running a version without HDF5 libraries.\n"; helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n"; helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += "The make.biom command outputs a .biom file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeBiomCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "biom") { pattern = "[filename],[distance],biom"; } else if (type == "shared") { pattern = "[filename],[distance],biom_shared"; } else if (type == "relabund") { pattern = "[filename],[distance],biom_relabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeBiomCommand::MakeBiomCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; fileFormat = "relabund"; current->setRelAbundFile(relabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; fileFormat = "sharedfile"; current->setSharedFile(sharedfile); } if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; fileFormat="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; fileFormat="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or relabund.\n"); abort = true; } } } if (outputdir == ""){ outputdir = util.hasPath(inputFileName); } contaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (contaxonomyfile == "not found") { contaxonomyfile = ""; } else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; } referenceTax = validParameter.validFile(parameters, "reftaxonomy"); if (referenceTax == "not found") { referenceTax = ""; } else if (referenceTax == "not open") { referenceTax = ""; abort = true; } picrustOtuFile = validParameter.validFile(parameters, "picrust"); if (picrustOtuFile == "not found") { picrustOtuFile = ""; } else if (picrustOtuFile == "not open") { picrustOtuFile = ""; abort = true; } metadatafile = validParameter.validFile(parameters, "metadata"); if (metadatafile == "not found") { metadatafile = ""; } else if (metadatafile == "not open") { metadatafile = ""; abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } if (picrustOtuFile != "") { picrust=true; if (contaxonomyfile == "") { m->mothurOut("[ERROR]: the constaxonomy parameter is required with the picrust parameter, aborting.\n"); abort = true; } if (referenceTax == "") { m->mothurOut("[ERROR]: the reftaxonomy parameter is required with the picrust parameter, aborting.\n"); abort = true; } }else { picrust=false; } if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels.\n"); abort = true; } format = validParameter.valid(parameters, "matrixtype"); if (format == "not found") { format = "sparse"; } if ((format != "sparse") && (format != "dense")) { m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense.\n"); abort = true; } output = validParameter.valid(parameters, "output"); if (output == "not found") { #ifdef USE_HDF5 output = "hdf5"; #else output = "simple"; #endif } if ((output != "hdf5") && (output != "simple")) { m->mothurOut("Invalid option for output. output options are hdf5 and simple, quitting.\n"); abort = true; } if (output == "hdf5") { #ifdef USE_HDF5 //do nothing we have the api #else m->mothurOut("[ERROR]: To write HDF5 biom files, you must have the API installed, quitting.\n"); abort=true; #endif } } } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand"); exit(1); } } //********************************************************************************************************************** int MakeBiomCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } SharedRAbundVectors* lookup = nullptr; SharedRAbundFloatVectors* lookupRel = nullptr; InputData input(inputFileName, fileFormat, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; vector sampleMetadata; if (fileFormat == "sharedfile") { lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); sampleMetadata = getSampleMetaData(lookup); }else { lookupRel = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookupRel->getNamesGroups(); sampleMetadata = getSampleMetaData(lookupRel); } //if user did not specify a label, then use first one if ((contaxonomyfile != "") && (labels.size() == 0)) { allLines = false; labels.insert(lastLabel); } Picrust* piCrust = nullptr; if (picrust) { piCrust = new Picrust(referenceTax, picrustOtuFile); } vector consTax; if (contaxonomyfile != "") { util.readConsTax(contaxonomyfile, consTax); } if (fileFormat == "sharedfile") { while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } getBiom(lookup, piCrust, consTax, sampleMetadata); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } }else { while (lookupRel != nullptr) { if (m->getControl_pressed()) { delete lookupRel; break; } getBiom(lookupRel, piCrust, consTax, sampleMetadata); delete lookupRel; lookupRel = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } if (picrust) { delete piCrust; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set sabund file as new current sabundfile string currentName = ""; itTypes = outputTypes.find("biom"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setBiomFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "execute"); exit(1); } } //********************************************************************************************************************** void MakeBiomCommand::getBiom(SharedRAbundVectors*& lookup, Picrust* picrust, vector consTax, vector sampleMetadata){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = lookup->getLabel(); string outputFileName = getOutputFileName("biom",variables); outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName); string mothurString = "mothur_" + toString(current->getVersion()); Biom* biom; if (output == "hdf5") { biom = new BiomHDF5(); } else { biom = new BiomSimple(); } biom->load(lookup, consTax); biom->fillHeading(mothurString, sharedfile); biom->print(outputFileName, sampleMetadata, picrust); if (picrust) { string outputFileName2 = getOutputFileName("shared",variables); outputNames.push_back(outputFileName2); outputTypes["shared"].push_back(outputFileName2); ofstream out2; util.openOutputFile(outputFileName2, out2); bool printHeaders = true; biom->getSharedRAbundVectors()->print(out2, printHeaders); out2.close(); } delete biom; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getBiom"); exit(1); } } //********************************************************************************************************************** void MakeBiomCommand::getBiom(SharedRAbundFloatVectors*& lookup, Picrust* picrust, vector consTax, vector sampleMetadata){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[distance]"] = lookup->getLabel(); string outputFileName = getOutputFileName("biom",variables); outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName); string mothurString = "mothur_" + toString(current->getVersion()); BiomSimple biom; biom.load(lookup, consTax); biom.fillHeading(mothurString, sharedfile); biom.print(outputFileName, sampleMetadata, picrust); if (picrust) { string outputFileName2 = getOutputFileName("relabund",variables); outputNames.push_back(outputFileName2); outputTypes["relabund"].push_back(outputFileName2); ofstream out2; util.openOutputFile(outputFileName2, out2); bool printHeaders = true; biom.getSharedRAbundFloatVectors()->print(out2, printHeaders); out2.close(); } } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getBiom"); exit(1); } } //********************************************************************************************************************** vector MakeBiomCommand::getSampleMetaData(SharedRAbundVectors*& lookup){ try { vector sampleMetadata; if (metadatafile == "") { for (int i = 0; i < lookup->size(); i++) { sampleMetadata.push_back("null"); } } else { ifstream in; util.openInputFile(metadatafile, in); vector groupNames, metadataLabels; map > lines; string headerLine = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(headerLine); //save names of columns you are reading for (int i = 1; i < pieces.size(); i++) { metadataLabels.push_back(pieces[i]); } int count = metadataLabels.size(); //read rest of file while (!in.eof()) { if (m->getControl_pressed()) { break; } string group = ""; in >> group; gobble(in); groupNames.push_back(group); string line = util.getline(in); gobble(in); vector thisPieces = util.splitWhiteSpaceWithQuotes(line); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + group + " " + util.getStringFromVector(thisPieces, ", ") + "\n"); } if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); } else { if (util.inUsersGroups(group, Groups)) { lines[group] = thisPieces; } } gobble(in); } in.close(); map >::iterator it; vector namesOfGroups = lookup->getNamesGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { if (m->getControl_pressed()) { break; } it = lines.find(namesOfGroups[i]); if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + namesOfGroups[i] + ", quitting.\n"); m->setControl_pressed(true); } else { vector values = it->second; string data = "{"; for (int j = 0; j < metadataLabels.size()-1; j++) { values[j] = util.removeQuotes(values[j]); data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; } values[metadataLabels.size()-1] = util.removeQuotes(values[metadataLabels.size()-1]); data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}"; sampleMetadata.push_back(data); } } } return sampleMetadata; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getSampleMetaData"); exit(1); } } //********************************************************************************************************************** vector MakeBiomCommand::getSampleMetaData(SharedRAbundFloatVectors*& lookup){ try { vector sampleMetadata; if (metadatafile == "") { for (int i = 0; i < lookup->size(); i++) { sampleMetadata.push_back("null"); } } else { ifstream in; util.openInputFile(metadatafile, in); vector groupNames, metadataLabels; map > lines; string headerLine = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(headerLine); //save names of columns you are reading for (int i = 1; i < pieces.size(); i++) { metadataLabels.push_back(pieces[i]); } int count = metadataLabels.size(); //read rest of file while (!in.eof()) { if (m->getControl_pressed()) { break; } string group = ""; in >> group; gobble(in); groupNames.push_back(group); string line = util.getline(in); gobble(in); vector thisPieces = util.splitWhiteSpaceWithQuotes(line); if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); } else { if (util.inUsersGroups(group, Groups)) { lines[group] = thisPieces; } } gobble(in); } in.close(); map >::iterator it; vector namesOfGroups = lookup->getNamesGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { if (m->getControl_pressed()) { break; } it = lines.find(namesOfGroups[i]); if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + namesOfGroups[i] + ", quitting.\n"); m->setControl_pressed(true); } else { vector values = it->second; string data = "{"; for (int j = 0; j < metadataLabels.size()-1; j++) { values[j] = util.removeQuotes(values[j]); data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; } values[metadataLabels.size()-1] = util.removeQuotes(values[metadataLabels.size()-1]); data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}"; sampleMetadata.push_back(data); } } } return sampleMetadata; } catch(exception& e) { m->errorOut(e, "MakeBiomCommand", "getSampleMetaData"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/makebiomcommand.h000077500000000000000000000027211424121717000215230ustar00rootroot00000000000000#ifndef Mothur_makebiomcommand_h #define Mothur_makebiomcommand_h // // makebiomcommand.h // Mothur // // Created by Sarah Westcott on 4/16/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "inputdata.h" #include "picrust.hpp" #include "biomsimple.hpp" #include "biomhdf5.hpp" class MakeBiomCommand : public Command { public: MakeBiomCommand(string); ~MakeBiomCommand(){} vector setParameters(); string getCommandName() { return "make.biom"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://biom-format.org/documentation/biom_format.html, http://www.mothur.org/wiki/Make.biom"; } string getDescription() { return "creates a biom file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sharedfile, relabundfile, contaxonomyfile, metadatafile, groups, format, label, referenceTax, picrustOtuFile, inputFileName, fileFormat, output; vector outputNames, Groups; set labels; bool abort, allLines, picrust; void getBiom(SharedRAbundVectors*&, Picrust*, vector, vector); void getBiom(SharedRAbundFloatVectors*&, Picrust*, vector, vector); vector getSampleMetaData(SharedRAbundVectors*&); vector getSampleMetaData(SharedRAbundFloatVectors*&); }; #endif mothur-1.48.0/source/commands/makeclrcommand.cpp000066400000000000000000000226411424121717000217100ustar00rootroot00000000000000// // makeclrcommand.cpp // Mothur // // Created by Sarah Westcott on 1/20/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "makeclrcommand.hpp" //********************************************************************************************************************** vector MakeCLRCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","clr",false,false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pzero("zero", "Number", "", "0.1", "", "", "","",false,false); parameters.push_back(pzero); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["clr"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeCLRCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.clr command parameters are shared, groups, zero and label. The shared file is required, unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your sharedfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The zero parameter allows you to set an value for zero OTUs. Default is 0.1.\n"; helpString += "The make.clr command should be in the following format: make.clr(shared=yourSharedFile).\n"; helpString += "Example make.clr(shared=final.opti_mcc.shared, zero=0.25).\n"; helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n"; helpString += "The make.clr command outputs a .clr file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "MakeCLRCommand"); exit(1); } } //********************************************************************************************************************** string MakeCLRCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "clr") { pattern = "[filename],[distance],clr-[filename],clr"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeCLRCommand::MakeCLRCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current shared file. You must provide a shared file, quitting.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "zero"); if (temp == "not found") { temp = "0.1"; } util.mothurConvert(temp, zeroReplacementValue); } } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "MakeCLRCommand"); exit(1); } } //********************************************************************************************************************** int MakeCLRCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); string outputFileName = getOutputFileName("clr",variables); bool printHeaders = true; ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["clr"].push_back(outputFileName); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup, out, printHeaders); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} string currentName = ""; itTypes = outputTypes.find("clr"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCLRFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "execute"); exit(1); } } //********************************************************************************************************************** // compute geometric mean through formula // antilog(((log(1) + log(2) + . . . + log(n))/n) //x <- c(10, 5, 3, 1,0) //> x[x==0] <- 0.1 //> log2(x / prod(x)^(1/4)) //[1] 2.3452054 1.3452054 0.6082399 -0.9767226 -4.2986507 void MakeCLRCommand::process(SharedRAbundVectors*& thisLookUp, ofstream& out, bool& printHeaders){ try { vector lookupGroups = thisLookUp->getNamesGroups(); vector lookup = thisLookUp->getSharedRAbundFloatVectors(); vector otuNames = thisLookUp->getOTUNames(); if (printHeaders) { out << "label\tGroup\tnumOtus\t" << util.getStringFromVector(otuNames, "\t") << endl; printHeaders = false; } for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { break; } vector abunds = lookup[i]->get(); double geoMean = util.geometricMean(abunds, zeroReplacementValue); for (int j = 0; j < abunds.size(); j++) { lookup[i]->set(j, log2(abunds[j]/geoMean)); } lookup[i]->print(out); delete lookup[i]; } } catch(exception& e) { m->errorOut(e, "MakeCLRCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makeclrcommand.hpp000066400000000000000000000023221424121717000217070ustar00rootroot00000000000000// // makeclrcommand.hpp // Mothur // // Created by Sarah Westcott on 1/20/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef makeclrcommand_hpp #define makeclrcommand_hpp #include "command.hpp" #include "inputdata.h" class MakeCLRCommand : public Command { public: MakeCLRCommand(string); ~MakeCLRCommand() = default; vector setParameters(); string getCommandName() { return "make.clr"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.clr"; } string getDescription() { return "create a log centered ratio file from a shared file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; set labels; //holds labels to be used string groups, label, sharedfile, zeroReplacement; vector Groups, outputNames; double zeroReplacementValue; void process(SharedRAbundVectors*&, ofstream&, bool&); }; #endif /* makeclrcommand_hpp */ mothur-1.48.0/source/commands/makecontigscommand.cpp000066400000000000000000004514231424121717000226020ustar00rootroot00000000000000// // makecontigscommand.cpp // Mothur // // Created by Sarah Westcott on 5/15/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "makecontigscommand.h" #include "contigsreport.hpp" #include "counttable.h" //************************************************************************************************** /** * Convert the probability to a quality score. */ double convertProbToQ(double prob){ try { return round(-10*log10(prob)); } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "convertProbToQ"); exit(1); } } //************************************************************************************************** /** * Convert the quality score to a probability. */ double convertQToProb(double Q){ try { double value = pow(10,(-Q/10)); if (isnan(value) || isinf(value)) { value = 0.0; } return value; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "convertQToProb"); exit(1); } } //************************************************************************************************** int loadQmatchValues(vector< vector >& qual_match_simple_bayesian, vector< vector >& qual_mismatch_simple_bayesian){ try { vector probs(47); for(int i=0;ierrorOut(e, "MakeContigsCommand", "loadQmatchValues"); exit(1); } } //************************************************************************************************** vector MakeContigsCommand::setParameters(){ try { CommandParameter pfastq("ffastq", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(pfastq); CommandParameter prfastq("rfastq", "InputTypes", "", "", "none", "none", "fastqGroup","fasta-qfile",false,false,true); parameters.push_back(prfastq); CommandParameter pfasta("ffasta", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "fastaGroup","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter prfasta("rfasta", "InputTypes", "", "", "none", "none", "none","fastaGroup",false,false,true); parameters.push_back(prfasta); CommandParameter pfqual("fqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(pfqual); CommandParameter prqual("rqfile", "InputTypes", "", "", "none", "none", "qfileGroup","",false,false,true); parameters.push_back(prqual); CommandParameter pfile("file", "InputTypes", "", "", "FastaFastqFile", "FastaFastqFile", "none","fasta-qfile",false,false,true); parameters.push_back(pfile); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos); CommandParameter pfindex("findex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(pfindex); CommandParameter prindex("rindex", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(prindex); CommandParameter pqfile("qfile", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pqfile); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter preorient("checkorient", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter palign("align", "Multiple", "needleman-gotoh-kmer", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); CommandParameter pthreshold("insert", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter pdeltaq("deltaq", "Number", "", "6", "", "", "","",false,false); parameters.push_back(pdeltaq); CommandParameter maxee("maxee", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(maxee); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pksize("ksize", "Number", "", "8", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pmaxhomop("maxhomop", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxlength("maxlength", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; createFileGroup = false; createOligosGroup = false; gz = false; //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["report"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeContigsCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs a fasta file. \n"; helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a count file will be created.\n"; helpString += "If a forward index or reverse index file is provided barcodes be trimmed, and a group file will be created. The oligos parameter is required if an index file is given.\n"; helpString += "The make.contigs command parameters are file, ffastq, rfastq, ffasta, rfasta, fqfile, rqfile, oligos, findex, rindex, qfile, format, tdiffs, bdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, insert, deltaq, maxee, allfiles and processors.\n"; helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n"; helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. Mothur will process each pair and create a combined fasta and report file with all the sequences.\n"; helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n"; helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n"; helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "The findex and rindex parameters are used to provide a forward index and reverse index files to process. \n"; helpString += "The qfile parameter is used to indicate you want a quality file assembled. Default=f. NOTE: The assembled quality scores outputted by mothur cannot be used for downstream quality screening. The score calculations are modeled after pandseq's method. Here's a link to the explanation from their documentation, https://github.com/neufeld/pandaseq#the-scores-of-the-output-bases-seem-really-low-whats-wrong. \n"; helpString += "The align parameter allows you to specify the alignment method to use. Your options are: kmer, gotoh and needleman. The default is needleman.\n"; helpString += "The ksize parameter allows you to set the kmer size if you are doing align=kmer. Default=8.\n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; helpString += "The checkorient parameter will look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is true.\n"; helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base. For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5). If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5). The default is 6.\n"; helpString += "The maxee parameter allows you to specify the maximum number of errors to allow in a sequence. Makes sense to use with deltaq=0. This numbrer is a decimal number. The expected numbrer of errors is based on Edgar's approach used in USEARCH/VSEARCH."; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score equal to or below the threshold we eliminate it. Default=20.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is all available.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambiguous bases allowed. The default is -1, meaning ignore.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. The default is -1, meaning ignore.\n"; helpString += "The maxlength parameter allows you to set a maximum length of your sequences. The default is -1, meaning ignore.\n"; helpString += "The make.contigs command should be in the following format: \n"; helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeContigsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[tag],contigs.fasta"; } else if (type == "qfile") { pattern = "[filename],[tag],contigs.qual"; } else if (type == "count") { pattern = "[filename],[tag],contigs.count_table"; } else if (type == "report") { pattern = "[filename],[tag],contigs_report"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeContigsCommand::MakeContigsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; ffastqfile = validParameter.validFile(parameters, "ffastq"); if (ffastqfile == "not open") { abort = true; } else if (ffastqfile == "not found") { ffastqfile = ""; } rfastqfile = validParameter.validFile(parameters, "rfastq"); if (rfastqfile == "not open") { abort = true; } else if (rfastqfile == "not found") { rfastqfile = ""; } ffastafile = validParameter.validFile(parameters, "ffasta"); if (ffastafile == "not open") { abort = true; } else if (ffastafile == "not found") { ffastafile = ""; } rfastafile = validParameter.validFile(parameters, "rfasta"); if (rfastafile == "not open") { abort = true; } else if (rfastafile == "not found") { rfastafile = ""; } fqualfile = validParameter.validFile(parameters, "fqfile"); if (fqualfile == "not open") { abort = true; } else if (fqualfile == "not found") { fqualfile = ""; } rqualfile = validParameter.validFile(parameters, "rqfile"); if (rqualfile == "not open") { abort = true; } else if (rqualfile == "not found") { rqualfile = ""; } file = validParameter.validFile(parameters, "file"); if (file == "not open") { abort = true; } else if (file == "not found") { file = ""; } //provide at least if ((file == "") && (ffastafile == "") && (ffastqfile == "")) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); } if ((file != "") && ((ffastafile != "") || (ffastqfile != ""))) { abort = true; m->mothurOut("[ERROR]: The file, ffastq and rfastq or ffasta and rfasta parameters are required.\n"); } if ((ffastqfile != "") && (rfastqfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffastq, you must provide a rfastq file.\n"); } if ((ffastqfile == "") && (rfastqfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfastq, you must provide a ffastq file.\n"); } if ((ffastafile != "") && (rfastafile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the ffasta, you must provide a rfasta file.\n"); } if ((ffastafile == "") && (rfastafile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rfasta, you must provide a ffasta file.\n"); } if ((fqualfile != "") && (rqualfile == "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the fqfile, you must provide a rqfile file.\n"); } if ((fqualfile == "") && (rqualfile != "")) { abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile, you must provide a fqfile file.\n"); } if (((fqualfile != "") || (rqualfile != "")) && ((ffastafile == "") || (rfastafile == ""))) { abort = true; m->mothurOut("[ERROR]: If you provide use the rqfile or fqfile file, you must provide the ffasta and rfasta parameters.\n"); } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not found") { oligosfile = ""; } else if(oligosfile == "not open") { abort = true; } else { current->setOligosFile(oligosfile); } findexfile = validParameter.validFile(parameters, "findex"); if (findexfile == "not found") { findexfile = ""; } else if(findexfile == "not open") { abort = true; } rindexfile = validParameter.validFile(parameters, "rindex"); if (rindexfile == "not found") { rindexfile = ""; } else if(rindexfile == "not open") { abort = true; } if ((rindexfile != "") || (findexfile != "")) { if (oligosfile == ""){ oligosfile = current->getOligosFile(); if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter.\n"); } else { m->mothurOut("You need to provide an oligos file if you are going to use an index file.\n"); abort = true; } } //can only use an index file with the fastq parameters not fasta and qual if ((ffastafile != "") || (rfastafile != "")) { m->mothurOut("[ERROR]: You can only use an index file with the fastq parameters or the file option.\n"); abort = true; } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "match"); if (temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "mismatch"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, misMatch); if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapopen"); if (temp == "not found"){ temp = "-2.0"; } util.mothurConvert(temp, gapOpen); if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapextend"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "insert"); if (temp == "not found"){ temp = "20"; } util.mothurConvert(temp, insert); if ((insert < 0) || (insert > 40)) { m->mothurOut("[ERROR]: insert must be between 0 and 40.\n"); abort=true; } temp = validParameter.valid(parameters, "deltaq"); if (temp == "not found"){ temp = "6"; } util.mothurConvert(temp, deltaq); temp = validParameter.valid(parameters, "maxee"); if (temp == "not found"){ temp = "10000"; } util.mothurConvert(temp, maxee); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); sdiffs = 0; temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } //+ ldiffs + sdiffs; temp = validParameter.valid(parameters, "allfiles"); if (temp == "not found") { temp = "F"; } allFiles = util.isTrue(temp); temp = validParameter.valid(parameters, "ksize"); if (temp == "not found"){ temp = "8"; } util.mothurConvert(temp, kmerSize); temp = validParameter.valid(parameters, "trimoverlap"); if (temp == "not found") { temp = "F"; } trimOverlap = util.isTrue(temp); temp = validParameter.valid(parameters, "qfile"); if (temp == "not found") { temp = "F"; } else { temp = util.getSimpleName(temp); } makeQualFile = util.isTrue(temp); align = validParameter.valid(parameters, "align"); if (align == "not found"){ align = "needleman"; } if ((align != "needleman") && (align != "gotoh") && (align != "kmer")) { m->mothurOut(align + " is not a valid alignment method. Options are kmer, needleman or gotoh. I will use needleman.\n"); align = "needleman"; } format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine(); abort=true; } temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "T"; } reorient = util.isTrue(temp); temp = validParameter.valid(parameters, "maxambig"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxAmbig); temp = validParameter.valid(parameters, "maxhomop"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxHomoP); temp = validParameter.valid(parameters, "maxlength"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxLength); if ((maxLength == -1) && (maxHomoP == -1) && (maxAmbig == -1)) { screenSequences = false; } else { screenSequences = true; } } } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); exit(1); } } //********************************************************************************************************************** int MakeContigsCommand::execute(){ try { bool debugIndex = false; if (debugIndex) { //allows you to run the oligos and index file independantly to check for barcode issues. make.contigs(findex=yourIndexFile, bdiffs=1, oligos=yourOligosFile, checkorient=t). just used for user support debugFunction(); } if (abort) { if (calledHelp) { return 0; } return 2; } unsigned long long numReads = 0; long start = time(nullptr); string outFastaFile, outScrapFastaFile, outQualFile, outScrapQualFile, outMisMatchFile, inputFile; if (file != "") { numReads = processMultipleFileOption(outFastaFile, outMisMatchFile); inputFile = file; } else if ((ffastqfile != "") || (ffastafile != "")) { numReads = processSingleFileOption(outFastaFile, outScrapFastaFile, outQualFile, outScrapQualFile, outMisMatchFile, ""); inputFile = ffastqfile; if (ffastafile != "") { inputFile = ffastafile; } } else { return 0; } if (groupMap.size() != 0) { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(inputFile); } map vars; vars["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFile)); vars["[tag]"] = ""; string outputCountFileName = getOutputFileName("count",vars); outputNames.push_back(outputCountFileName); outputTypes["count"].push_back(outputCountFileName); createCountFile(outputCountFileName, outFastaFile); } //add headers to mismatch file ofstream out; util.openOutputFile(outMisMatchFile+".temp", out); ContigsReport report; report.printHeaders(out); out.close();//print Headers util.appendFilesFront(outMisMatchFile+".temp", outMisMatchFile); //removes temp if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } string currentCount = ""; itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentCount = (itTypes->second)[0]; current->setCountFile(currentCount); } } string currentQual = ""; itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; current->setQualFile(currentQual); } } string currentReport = ""; itTypes = outputTypes.find("report"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentReport = (itTypes->second)[0]; current->setContigsReportFile(currentReport); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output group counts int total = 0; if (groupCounts.size() != 0) { m->mothurOut("\nGroup count: \n"); } for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second) + "\n"); } if (total != 0) { m->mothurOut("\nTotal of all groups is " + toString(total) + "\n"); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to process " + toString(numReads) + " sequences.\n"); //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "execute"); exit(1); } } /**************************************************************************************************/ int MakeContigsCommand::createCountFile(string outputGroupFile, string resultFastafile) { try { CountTable ct; ct.createTable(groupMap); ct.printCompressedTable(outputGroupFile); if(allFiles){ //run split.groups command //use unique.seqs to create new name and fastafile string inputString = "fasta=" + resultFastafile + ", count=" + outputGroupFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Generating allfiles... Running command: split.groups(" + inputString + ")\n"); current->setMothurCalling(true); Command* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); map > filenames = splitCommand->getOutputFiles(); delete splitCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } return 0; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "createCountFile"); exit(1); } } //********************************************************************************************************************** bool testGZReadable(vector& fileInputs, vector& indexInputs, bool& decompressionHelped, string format, MothurOut* m) { try { bool error = false; bool readable = true; decompressionHelped = false; #ifdef USE_BOOST boost::iostreams::filtering_istream inFF, inRF; ifstream inForward, inReverse; string forwardFile = fileInputs[0]; string reverseFile = fileInputs[1]; Utils util; util.openInputFileBinary(forwardFile, inForward, inFF); util.openInputFileBinary(reverseFile, inReverse, inRF); FastqRead fread(inFF, error, format); FastqRead rread(inRF, error, format); inFF.pop(); inRF.pop(); //error=true; to force test of decompression if (error) { //error reading fastq files, try unzipping string forwardOutput = util.getRootName(forwardFile) + "mothurTest_forward.fastq"; string reverseOutput = util.getRootName(reverseFile) + "mothurTest_reverse.fastq"; string unzipCommand = "gunzip < " + forwardFile + " > " + forwardOutput; system(unzipCommand.c_str()); unzipCommand = "gunzip < " + reverseFile + " > " + reverseOutput; system(unzipCommand.c_str()); ifstream inForward1, inReverse1; util.openInputFile(forwardOutput, inForward1); util.openInputFile(reverseOutput, inReverse1); FastqRead fread(inForward1, error, format); FastqRead rread(inReverse1, error, format); if (!error) { m->mothurOut("[WARNING]: mothur is unable to read your compressed fastq files. Decompressing files and continuing to process.\n\n"); fileInputs[0] = forwardOutput; fileInputs[1] = reverseOutput; if (indexInputs.size() != 0) { if ((indexInputs[0] != "NONE") && (indexInputs[0] != "")){ string forwardIndex = util.getRootName(indexInputs[0]) + "mothurTest_forward_index.fastq"; string unzipCommand = "gunzip < " + indexInputs[0] + " > " + forwardIndex; system(unzipCommand.c_str()); indexInputs[0] = forwardIndex; } if ((indexInputs[1] != "NONE") && (indexInputs[1] != "")) { string reverseIndex = util.getRootName(indexInputs[1]) + "mothurTest_reverse_index.fastq"; unzipCommand = "gunzip < " + indexInputs[1] + " > " + reverseIndex; system(unzipCommand.c_str()); indexInputs[1] = reverseIndex; } } decompressionHelped = true; } else { readable = false; } } #endif return readable; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "testGZReadable"); exit(1); } } //********************************************************************************************************************** unsigned long long MakeContigsCommand::processSingleFileOption(string& outFastaFile, string& outScrapFastaFile, string& outQualFile, string& outScrapQualFile, string& outMisMatchFile, string group) { try { unsigned long long numReads = 0; string inputFile = ""; vector fileInputs; vector qualOrIndexInputs; delim = '>'; map variables; string thisOutputDir = outputdir; if (ffastafile != "") { inputFile = ffastafile; if (outputdir == "") { thisOutputDir = util.hasPath(inputFile); } fileInputs.push_back(ffastafile); fileInputs.push_back(rfastafile); if (fqualfile != "") { qualOrIndexInputs.push_back(fqualfile); qualOrIndexInputs.push_back(rqualfile); variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fqualfile)); variables["[tag]"] = "trim"; outQualFile = getOutputFileName("qfile",variables); variables["[tag]"] = "scrap"; outScrapQualFile = getOutputFileName("qfile",variables); }else { outQualFile = ""; outScrapQualFile = ""; makeQualFile = false; } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFile)); delim = '>'; }else { //ffastqfile inputFile = ffastqfile; if (outputdir == "") { thisOutputDir = util.hasPath(inputFile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFile)); variables["[tag]"] = "trim"; outQualFile = getOutputFileName("qfile",variables); variables["[tag]"] = "scrap"; outScrapQualFile = getOutputFileName("qfile",variables); fileInputs.push_back(ffastqfile); fileInputs.push_back(rfastqfile); if ((findexfile != "") || (rindexfile != "")){ qualOrIndexInputs.push_back("NONE"); qualOrIndexInputs.push_back("NONE"); if (findexfile != "") { qualOrIndexInputs[0] = findexfile; } if (rindexfile != "") { qualOrIndexInputs[1] = rindexfile; } } delim = '@'; } bool allGZ = true; #ifdef USE_BOOST bool allPlainTxt = true; if (util.isGZ(fileInputs[0])[1]) { allPlainTxt = false; } else { allGZ = false; } if (util.isGZ(fileInputs[1])[1]) { allPlainTxt = false; } else { allGZ = false; } if (qualOrIndexInputs.size() != 0) { if (qualOrIndexInputs[0] != "NONE") { if (util.isGZ(qualOrIndexInputs[0])[1]) { allPlainTxt = false; } else { allGZ = false; } } if (qualOrIndexInputs[1] != "NONE") { if (util.isGZ(qualOrIndexInputs[1])[1]) { allPlainTxt = false; } else { allGZ = false; } } if (!allGZ && !allPlainTxt) { //mixed bag of files, uh oh... m->mothurOut("[ERROR]: Your files must all be in compressed .gz form or all in plain text form. Please correct. \n"); m->setControl_pressed(true); } } #else allGZ = false; #endif bool decompressionHelped = false; if (allGZ) { gz = true; //test to make sure you can read the gz files bool readable = testGZReadable(fileInputs, qualOrIndexInputs, decompressionHelped, format, m); if (readable) { if (decompressionHelped) { gz = false; } }else { m->mothurOut("[ERROR]: Unable to read compressed .gz files, please decompress and run make.contigs again. \n"); m->setControl_pressed(true); return 0; } } else { gz = false; } variables["[tag]"] = "trim"; outFastaFile = getOutputFileName("fasta",variables); variables["[tag]"] = "scrap"; outScrapFastaFile = getOutputFileName("fasta",variables); variables["[tag]"] = ""; outMisMatchFile = getOutputFileName("report",variables); vector > fastaFileNames, qualFileNames; map uniqueFastaNames;// so we don't add the same groupfile multiple times createOligosGroup = false; map pairedPrimers, rpairedPrimers, revpairedPrimers, pairedBarcodes, rpairedBarcodes, revpairedBarcodes; vector barcodeNames, primerNames; if(oligosfile != "") { createOligosGroup = getOligos(pairedPrimers, rpairedPrimers, revpairedPrimers, pairedBarcodes, rpairedBarcodes, revpairedBarcodes, barcodeNames, primerNames); } //give group in file file precedence if (createFileGroup) { createOligosGroup = false; } m->mothurOut("Making contigs...\n"); numReads = createProcesses(fileInputs, qualOrIndexInputs, outFastaFile, outScrapFastaFile, outQualFile, outScrapQualFile, outMisMatchFile, fastaFileNames, qualFileNames, group, pairedPrimers, rpairedPrimers, revpairedPrimers, pairedBarcodes, rpairedBarcodes, revpairedBarcodes, barcodeNames, primerNames); if (decompressionHelped) { util.mothurRemove(fileInputs[0]); util.mothurRemove(fileInputs[1]); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (file == "") { outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); outputNames.push_back(outScrapFastaFile); outputTypes["fasta"].push_back(outScrapFastaFile); if (makeQualFile) { outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile); outputNames.push_back(outScrapQualFile); outputTypes["qfile"].push_back(outScrapQualFile); } else { if (outQualFile != "") { util.mothurRemove(outQualFile); } if (outScrapQualFile != "") { util.mothurRemove(outScrapQualFile); } } outputNames.push_back(outMisMatchFile); outputTypes["report"].push_back(outMisMatchFile); } m->mothurOut("Done.\n"); return numReads; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "processSingleFileOption"); exit(1); } } /**************************************************************************************************/ struct contigsData { MothurOut* m; Utils util; OutputWriter* trimFileName; OutputWriter* scrapFileName; OutputWriter* trimQFileName; OutputWriter* scrapQFileName; OutputWriter* misMatchesFile; string align, group, format; float match, misMatch, gapOpen, gapExtend; bool gz, reorient, trimOverlap, createGroupFromOligos, createGroupFromFilePairs, makeQualFile, screenSequences; char delim; int nameType, offByOneTrimLength, pdiffs, bdiffs, tdiffs, kmerSize, insert, deltaq, maxee, maxAmbig, maxHomoP, maxLength; vector inputFiles, qualOrIndexFiles, outputNames; set badNames; linePair linesInput; linePair linesInputReverse; linePair qlinesInput; linePair qlinesInputReverse; long long count; vector primerNameVector; vector barcodeNameVector; map groupCounts; map groupMap; map pairedBarcodes, reorientedPairedBarcodes, reversedPairedBarcodes; map pairedPrimers, reorientedPairedPrimers, reversedPairedPrimers; contigsData(){} ~contigsData(){} contigsData(OutputWriter* tn, OutputWriter* sn, OutputWriter* tqn, OutputWriter* sqn, OutputWriter* mmf) { trimFileName = tn; scrapFileName = sn; trimQFileName = tqn; scrapQFileName = sqn; misMatchesFile = mmf; m = MothurOut::getInstance(); count = 0; makeQualFile = true; if (trimQFileName == nullptr) { makeQualFile = false; } } contigsData(OutputWriter* tn, OutputWriter* sn, OutputWriter* tqn, OutputWriter* sqn, OutputWriter* mmf, vector ifn, vector qif, linePair li, linePair lir, linePair qli, linePair qlir) { trimFileName = tn; scrapFileName = sn; trimQFileName = tqn; scrapQFileName = sqn; misMatchesFile = mmf; m = MothurOut::getInstance(); inputFiles = ifn; qualOrIndexFiles = qif; linesInput = li; linesInputReverse = lir; qlinesInput = qli; qlinesInputReverse = qlir; count = 0; makeQualFile = true; if (trimQFileName == nullptr) { makeQualFile = false; } } void setVariables(bool isgz, char de, int nt, int offby, map pbr, map ppr, map rpbr, map rppr, map repbr, map reppr, vector priNameVector, vector barNameVector, bool ro, int pdf, int bdf, int tdf, string al, float ma, float misMa, float gapO, float gapE, int thr, int delt, double maxe, int km, string form, bool to, bool cfg, bool cgff, string gp, bool screen, int maxH, int maxL, int maxAm) { gz = isgz; delim = de; nameType = nt; offByOneTrimLength = offby; pairedPrimers = ppr; pairedBarcodes = pbr; reorientedPairedPrimers = rppr; reorientedPairedBarcodes = rpbr; reversedPairedPrimers = reppr; reversedPairedBarcodes = repbr; primerNameVector = priNameVector; barcodeNameVector = barNameVector; group = gp; createGroupFromOligos = cfg; createGroupFromFilePairs = cgff; pdiffs = pdf; bdiffs = bdf; tdiffs = tdf; reorient = ro; match = ma; misMatch = misMa; gapOpen = gapO; gapExtend = gapE; insert = thr; kmerSize = km; align = al; deltaq = delt; maxee = maxe; format = form; trimOverlap = to; screenSequences = screen; maxHomoP = maxH; maxLength = maxL; maxAmbig = maxAm; } void copyVariables(contigsData* copy) { gz = copy->gz; delim = copy->delim; nameType = copy->nameType; offByOneTrimLength = copy->offByOneTrimLength; pairedPrimers = copy->pairedPrimers; pairedBarcodes = copy->pairedBarcodes; reorientedPairedPrimers = copy->reorientedPairedPrimers; reorientedPairedBarcodes = copy->reorientedPairedBarcodes; reversedPairedPrimers = copy->reversedPairedPrimers; reversedPairedBarcodes = copy->reversedPairedBarcodes; primerNameVector = copy->primerNameVector; barcodeNameVector = copy->barcodeNameVector; group = copy->group; createGroupFromOligos = copy->createGroupFromOligos; createGroupFromFilePairs = copy->createGroupFromFilePairs; pdiffs = copy->pdiffs; bdiffs = copy->bdiffs; tdiffs = copy->tdiffs; reorient = copy->reorient; match = copy->match; misMatch = copy->misMatch; gapOpen = copy->gapOpen; gapExtend = copy->gapExtend; insert = copy->insert; kmerSize = copy->kmerSize; align = copy->align; deltaq = copy->deltaq; maxee = copy->maxee; format = copy->format; trimOverlap = copy->trimOverlap; screenSequences = copy->screenSequences; maxHomoP = copy->maxHomoP; maxLength = copy->maxLength; maxAmbig = copy->maxAmbig; } }; /**************************************************************************************************/ struct groupContigsData { MothurOut* m; Utils util; int start, end; vector< vector > fileInputs; set badNames; map file2Groups; contigsData* bundle; long long count; groupContigsData() = default; groupContigsData(vector< vector > fi, int s, int e, contigsData* cd, map f2g) { fileInputs = fi; start = s; end = e; bundle = cd; file2Groups = f2g; count = 0; m = MothurOut::getInstance(); } ~groupContigsData() { delete bundle; } }; /**************************************************************************************************/ int setNameType(string forward, string reverse, int& offByOneTrimLength) { MothurOut* m = MothurOut::getInstance(); try { int type = 0; Utils util; if (forward == reverse) { type = perfectMatch; } else { int pos = forward.find_last_of('#'); string tempForward = forward; if (pos != string::npos) { tempForward = forward.substr(0, pos); } int pos2 = reverse.find_last_of('#'); string tempReverse = reverse; if (pos2 != string::npos) { tempReverse = reverse.substr(0, pos2); } if (tempForward == tempReverse) { type = poundMatch; } else { char delim = ':'; if (m->getChangedSeqNames()) { delim = '_'; } vector delims; delims.push_back(delim); delims.push_back('/'); for (int j = 0; j < delims.size(); j++) { delim = delims[j]; int pos = forward.find_last_of(delim); string tempForward = forward; string tempForwardEnd = forward; if (pos != string::npos) { tempForwardEnd = forward.substr(pos+1); } int pos2 = reverse.find_last_of(delim); string tempReverse = reverse; string tempReverseEnd = reverse; if (pos2 != string::npos) { tempReverseEnd = reverse.substr(pos2+1); } if (tempForwardEnd != tempReverseEnd) { if ((util.isAllAlphaNumerics(tempForwardEnd)) && (util.isAllAlphaNumerics(tempReverseEnd))) { //check for off by one on rest of name if (tempForward.length() == tempReverse.length()) { int numDiffs = 0; char forwardDiff = ' '; char reverseDiff = ' '; int spot = 0; for (int i = 0; i < tempForward.length(); i++) { if (tempForward[i] != tempReverse[i]) { numDiffs++; forwardDiff = tempForward[i]; reverseDiff = tempReverse[i]; spot = i; } } if (numDiffs == 1) { if ((forwardDiff == '1') && (reverseDiff == '2')) { type = offByOne; offByOneTrimLength = tempForward.length()-spot+1; } } } } } } } } return type; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "setNameType"); exit(1); } } /**************************************************************************************************/ int setNameType(string forwardFile, string reverseFile, char delim, int& offByOneTrimLength, bool gz, string format) { MothurOut* m = MothurOut::getInstance(); try { int type = 0; bool error = false; string forward = ""; string reverse = ""; Utils util; ifstream inForward, inReverse; #ifdef USE_BOOST boost::iostreams::filtering_istream inFF, inRF; #endif if (!gz) { //plain text files util.openInputFile(forwardFile, inForward); util.openInputFile(reverseFile, inReverse); if (delim == '>') { Sequence fread(inForward); forward = fread.getName(); Sequence rread(inReverse); reverse = rread.getName(); }else { FastqRead fread(inForward, error, format); forward = fread.getName(); FastqRead rread(inReverse, error, format); reverse = rread.getName(); } inForward.close(); inReverse.close(); }else { //compressed files #ifdef USE_BOOST util.openInputFileBinary(forwardFile, inForward, inFF); util.openInputFileBinary(reverseFile, inReverse, inRF); if (delim == '>') { Sequence fread(inFF); forward = fread.getName(); Sequence rread(inRF); reverse = rread.getName(); }else { FastqRead fread(inFF, error, format); forward = fread.getName(); FastqRead rread(inRF, error, format); reverse = rread.getName(); } inFF.pop(); inRF.pop(); #endif } type = setNameType(forward, reverse, offByOneTrimLength); return type; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "setNameType"); exit(1); } } //********************************************************************************************************************** unsigned long long MakeContigsCommand::processMultipleFileOption(string& compositeFastaFile, string& compositeMisMatchFile) { try { //read file map file2Group; vector< vector > fileInputs = readFileNames(file, file2Group); if (m->getControl_pressed()) { return 0; } unsigned long long numReads = 0; map cvars; string compOutputDir = outputdir; if (outputdir == "") { compOutputDir = util.hasPath(file); } cvars["[filename]"] = compOutputDir + util.getRootName(util.getSimpleName(file)); cvars["[tag]"] = "trim"; compositeFastaFile = getOutputFileName("fasta",cvars); cvars["[tag]"] = "scrap"; string compositeScrapFastaFile = getOutputFileName("fasta",cvars); cvars["[tag]"] = "trim"; string compositeQualFile = getOutputFileName("qfile",cvars); cvars["[tag]"] = "scrap"; string compositeScrapQualFile = getOutputFileName("qfile",cvars); cvars["[tag]"] = ""; compositeMisMatchFile = getOutputFileName("report",cvars); ofstream outCTFasta, outCTQual, outCSFasta, outCSQual, outCMisMatch; util.openOutputFile(compositeFastaFile, outCTFasta); outCTFasta.close(); outputNames.push_back(compositeFastaFile); outputTypes["fasta"].push_back(compositeFastaFile); util.openOutputFile(compositeScrapQualFile, outCSQual); outCSQual.close(); util.openOutputFile(compositeQualFile, outCTQual); outCTQual.close(); if (makeQualFile) { outputNames.push_back(compositeScrapQualFile); outputTypes["qfile"].push_back(compositeScrapQualFile); outputNames.push_back(compositeQualFile); outputTypes["qfile"].push_back(compositeQualFile); } util.openOutputFile(compositeScrapFastaFile, outCSFasta); outCSFasta.close(); outputNames.push_back(compositeScrapFastaFile); outputTypes["fasta"].push_back(compositeScrapFastaFile); util.openOutputFile(compositeMisMatchFile, outCMisMatch); outCMisMatch.close(); outputNames.push_back(compositeMisMatchFile); outputTypes["report"].push_back(compositeMisMatchFile); if (gz) { numReads = createProcessesGroups(fileInputs, compositeFastaFile, compositeScrapFastaFile, compositeQualFile, compositeScrapQualFile, compositeMisMatchFile, file2Group); }else { for (int l = 0; l < fileInputs.size(); l++) { if (m->getControl_pressed()) { break; } int startTime = time(nullptr); m->mothurOut("\n>>>>>\tProcessing file pair " + fileInputs[l][0] + " - " + fileInputs[l][1] + " (files " + toString(l+1) + " of " + toString(fileInputs.size()) + ")\t<<<<<\n"); ffastqfile = fileInputs[l][0]; rfastqfile = fileInputs[l][1]; findexfile = fileInputs[l][2]; rindexfile = fileInputs[l][3]; //run file as if it was a single string outFastaFile, outScrapFastaFile, outQualFile, outScrapQualFile, outMisMatchFile; int thisNumReads = processSingleFileOption(outFastaFile, outScrapFastaFile, outQualFile, outScrapQualFile, outMisMatchFile, file2Group[l]); numReads += thisNumReads; util.appendFiles(outMisMatchFile, compositeMisMatchFile); util.mothurRemove(outMisMatchFile); util.appendFiles(outFastaFile, compositeFastaFile); util.mothurRemove(outFastaFile); util.appendFiles(outScrapFastaFile, compositeScrapFastaFile); util.mothurRemove(outScrapFastaFile); if (makeQualFile) { util.appendFiles(outQualFile, compositeQualFile); util.appendFiles(outScrapQualFile, compositeScrapQualFile); } util.mothurRemove(outQualFile); util.mothurRemove(outScrapQualFile); m->mothurOut("\nIt took " + toString(time(nullptr) - startTime) + " secs to assemble " + toString(thisNumReads) + " reads.\n\n"); } } if (!makeQualFile) { util.mothurRemove(compositeQualFile); util.mothurRemove(compositeScrapQualFile); } return numReads; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "processMultipleFileOption"); exit(1); } } //*************************************************************************************************************** /** * checks for minor diffs @MS7_15058:1:1101:11899:1633#8/1 @MS7_15058:1:1101:11899:1633#8/2 should match */ bool fixName(string& forward, int nameType, int offByOneTrimLength){ try { bool match = false; if (nameType == poundMatch) { match = true; int pos = forward.find_last_of('#'); if (pos != string::npos) { forward = forward.substr(0, pos); } }else if (nameType == perfectMatch) { match = true; } else if (nameType == offByOne) { match = true; forward = forward.substr(0, (forward.length()-offByOneTrimLength)); } return match; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "fixName"); exit(1); } } //*************************************************************************************************************** /** * checks for minor diffs @MS7_15058:1:1101:11899:1633#8/1 @MS7_15058:1:1101:11899:1633#8/2 should match */ bool checkName(FastqRead& forward, FastqRead& reverse, int nameType, int offByOneTrimLength){ try { bool match = false; string forwardName = forward.getName(); string reverseName = reverse.getName(); if (nameType == poundMatch) { match = true; int pos = forwardName.find_last_of('#'); if (pos != string::npos) { forwardName = forwardName.substr(0, pos); } int pos2 = reverseName.find_last_of('#'); if (pos2 != string::npos) { reverseName = reverseName.substr(0, pos2); } if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } }else if (nameType == perfectMatch) { if (forwardName == reverseName) { match = true; } } else if (nameType == offByOne) { match = true; reverseName = reverseName.substr(0, (reverseName.length()-offByOneTrimLength)); forwardName = forwardName.substr(0, (forwardName.length()-offByOneTrimLength)); if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } } return match; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "ckeckName"); exit(1); } } //*************************************************************************************************************** /** * checks for minor diffs @MS7_15058:1:1101:11899:1633#8/1 @MS7_15058:1:1101:11899:1633#8/2 should match */ bool checkName(Sequence& forward, Sequence& reverse, int nameType, int offByOneTrimLength){ try { bool match = false; string forwardName = forward.getName(); string reverseName = reverse.getName(); if (nameType == poundMatch) { match = true; int pos = forwardName.find_last_of('#'); if (pos != string::npos) { forwardName = forwardName.substr(0, pos); } int pos2 = reverseName.find_last_of('#'); if (pos2 != string::npos) { reverseName = reverseName.substr(0, pos2); } if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } }else if (nameType == perfectMatch) { if (forwardName == reverseName) { match = true; } } else if (nameType == offByOne) { match = true; reverseName = reverseName.substr(0, (reverseName.length()-offByOneTrimLength)); forwardName = forwardName.substr(0, (forwardName.length()-offByOneTrimLength)); if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } } return match; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "ckeckName"); exit(1); } } //*************************************************************************************************************** /** * checks for minor diffs @MS7_15058:1:1101:11899:1633#8/1 @MS7_15058:1:1101:11899:1633#8/2 should match */ bool checkName(QualityScores& forward, QualityScores& reverse, int nameType, int offByOneTrimLength){ try { bool match = false; string forwardName = forward.getName(); string reverseName = reverse.getName(); if (nameType == poundMatch) { match = true; int pos = forwardName.find_last_of('#'); if (pos != string::npos) { forwardName = forwardName.substr(0, pos); } int pos2 = reverseName.find_last_of('#'); if (pos2 != string::npos) { reverseName = reverseName.substr(0, pos2); } if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } }else if (nameType == perfectMatch) { if (forwardName == reverseName) { match = true; } } else if (nameType == offByOne) { match = true; reverseName = reverseName.substr(0, (reverseName.length()-offByOneTrimLength)); forwardName = forwardName.substr(0, (forwardName.length()-offByOneTrimLength)); if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } } return match; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "ckeckName"); exit(1); } } //*************************************************************************************************************** /** * checks for minor diffs @MS7_15058:1:1101:11899:1633#8/1 @MS7_15058:1:1101:11899:1633#8/2 should match */ bool checkName(Sequence& forward, QualityScores& reverse, int nameType, int offByOneTrimLength){ try { bool match = false; string forwardName = forward.getName(); string reverseName = reverse.getName(); if (nameType == poundMatch) { match = true; string forwardName = forward.getName(); string reverseName = reverse.getName(); int pos = forwardName.find_last_of('#'); if (pos != string::npos) { forwardName = forwardName.substr(0, pos); } int pos2 = reverseName.find_last_of('#'); if (pos2 != string::npos) { reverseName = reverseName.substr(0, pos2); } if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } }else if (nameType == perfectMatch) { if (forwardName == reverseName) { match = true; } } else if (nameType == offByOne) { match = true; reverseName = reverseName.substr(0, (reverseName.length()-offByOneTrimLength)); forwardName = forwardName.substr(0, (forwardName.length()-offByOneTrimLength)); if (forwardName == reverseName) { forward.setName(forwardName); reverse.setName(reverseName); }else{ match = false; } } return match; } catch(exception& e) { MothurOut* m; m = MothurOut::getInstance(); m->errorOut(e, "MakeContigsCommand", "ckeckName"); exit(1); } } /**************************************************************************************************/ //vector contigScores = assembleFragments(qual_match_simple_bayesian, qual_mismatch_simple_bayesian, fSeq, rSeq, alignment, contig); vector assembleFragments(vector< vector >&qual_match_simple_bayesian, vector< vector >& qual_mismatch_simple_bayesian, Sequence& fSeq, Sequence& rSeq, vector scores1, vector scores2, bool hasQuality, Alignment*& alignment, string& contig, string& trashCode, int& oend, int& oStart, int& numMismatches, int insert, int deltaq, bool trimOverlap) { MothurOut* m; m = MothurOut::getInstance(); try { vector contigScores; //flip the reverse reads rSeq.reverseComplement(); //pairwise align alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned(), true); map ABaseMap = alignment->getSeqAAlnBaseMap(); map BBaseMap = alignment->getSeqBAlnBaseMap(); fSeq.setAligned(alignment->getSeqAAln()); rSeq.setAligned(alignment->getSeqBAln()); int length = fSeq.getAligned().length(); //traverse alignments merging into one contiguous seq string seq1 = fSeq.getAligned(); string seq2 = rSeq.getAligned(); int overlapStart = fSeq.getStartPos()-1; int seq2Start = rSeq.getStartPos()-1; //bigger of the 2 starting positions is the location of the overlapping start if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1 overlapStart = seq2Start; for (int i = 0; i < overlapStart; i++) { contig += seq1[i]; if (hasQuality) { if (((seq1[i] != '-') && (seq1[i] != '.'))) { contigScores.push_back(scores1[ABaseMap[i]]); } } } }else { //seq1 starts later so take from 0 to overlapStart from seq2 for (int i = 0; i < overlapStart; i++) { contig += seq2[i]; if (hasQuality) { if (((seq2[i] != '-') && (seq2[i] != '.'))) { contigScores.push_back(scores2[BBaseMap[i]]); } } } } int seq1End = fSeq.getEndPos(); int seq2End = rSeq.getEndPos(); int overlapEnd = seq1End; if (seq2End < overlapEnd) { overlapEnd = seq2End; } //smallest end position is where overlapping ends oStart = contig.length(); int firstForward = 0; int seq2FirstForward = 0; int lastReverse = seq1.length(); int seq2lastReverse = seq2.length(); bool firstChooseSeq1 = false; bool lastChooseSeq1 = false; if (hasQuality) { for (int i = 0; i < seq1.length(); i++) { if ((seq1[i] != '.') && (seq1[i] != '-')) { if (scores1[ABaseMap[i]] == 2) { firstForward++; }else { break; } } } for (int i = 0; i < seq2.length(); i++) { if ((seq2[i] != '.') && (seq2[i] != '-')) { if (scores2[BBaseMap[i]] == 2) { seq2FirstForward++; }else { break; } } } if (seq2FirstForward > firstForward) { firstForward = seq2FirstForward; firstChooseSeq1 = true; } for (int i = seq1.length()-1; i >= 0; i--) { if ((seq1[i] != '.') && (seq1[i] != '-')) { if (scores1[ABaseMap[i]] == 2) { lastReverse--; }else { break; } } } for (int i = seq2.length()-1; i >= 0; i--) { if ((seq2[i] != '.') && (seq2[i] != '-')) { if (scores2[BBaseMap[i]] == 2) { seq2lastReverse--; }else { break; } } } if (lastReverse > seq2lastReverse) { lastReverse = seq2lastReverse; lastChooseSeq1 = true; } } for (int i = overlapStart; i < overlapEnd; i++) { if (seq1[i] == seq2[i]) { contig += seq1[i]; if (hasQuality) { contigScores.push_back(qual_match_simple_bayesian[PHREDCLAMP(scores1[ABaseMap[i]])][PHREDCLAMP(scores2[BBaseMap[i]])]); } }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2, unless quality score for base is below insert. In that case eliminate base if (hasQuality) { if (scores2[BBaseMap[i]] <= insert) { } // else { contig += seq2[i]; contigScores.push_back(scores2[BBaseMap[i]]); } } else { contig += seq2[i]; } //with no quality info, then we keep it? }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1, unless quality score for base is below insert. In that case eliminate base if (hasQuality) { if (scores1[ABaseMap[i]] <= insert) { } //eliminate base else { contig += seq1[i]; contigScores.push_back(scores1[ABaseMap[i]]); } }else { contig += seq1[i]; } //with no quality info, then we keep it? }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality if (hasQuality) { if (abs(scores1[ABaseMap[i]] - scores2[BBaseMap[i]]) >= deltaq) { //is the difference in qual scores >= deltaq, if yes choose base with higher score char c = seq1[i]; if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { c = seq2[i]; } contig += c; if ((i >= firstForward) && (i <= lastReverse)) { //in unmasked section contigScores.push_back(qual_mismatch_simple_bayesian[PHREDCLAMP(scores1[ABaseMap[i]])][PHREDCLAMP(scores2[BBaseMap[i]])]); }else if (i < firstForward) { if (firstChooseSeq1) { contigScores.push_back(scores1[ABaseMap[i]]); } else { contigScores.push_back(scores2[BBaseMap[i]]); } }else if ((i > lastReverse)) { if (lastChooseSeq1) { contigScores.push_back(scores1[ABaseMap[i]]); } else { contigScores.push_back(scores2[BBaseMap[i]]); } }else { contigScores.push_back(2); } //N }else { //if no, base becomes n contig += 'N'; contigScores.push_back(2); } numMismatches++; }else { numMismatches++; } //cant decide, so eliminate and mark as mismatch }else { //should never get here m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n"); } //printf("Overlap seq: %i, %i, %i, %c, %i\n", i, scores1[ABaseMap[i]], scores2[BBaseMap[i]], contig[contig.length()-1], contigScores[contigScores.size()-1]); } oend = contig.length(); if (seq1End < seq2End) { //seq1 ends before seq2 so take from overlap to length from seq2 for (int i = overlapEnd; i < length; i++) { contig += seq2[i]; if (hasQuality) { if (((seq2[i] != '-') && (seq2[i] != '.'))) { contigScores.push_back(scores2[BBaseMap[i]]); } } } }else { //seq2 ends before seq1 so take from overlap to length from seq1 for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; if (hasQuality) { if (((seq1[i] != '-') && (seq1[i] != '.'))) { contigScores.push_back(scores1[ABaseMap[i]]); } } } } if (trimOverlap) { contig = contig.substr(overlapStart, oend-oStart); if (hasQuality) { vector newContigScores; for (int i = overlapStart; i < oend; i++) { newContigScores.push_back(contigScores[i]); } contigScores = newContigScores; } } if (contig == "") { trashCode += "l"; contig = "NNNN"; contigScores.push_back(2); contigScores.push_back(2); contigScores.push_back(2); contigScores.push_back(2); } return contigScores; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "assembleFragments"); exit(1); } } /**************************************************************************************************/ void screenSequence(string& contig, string& trashCode, contigsData* params) { try { //if you failed before screening, don't bother screening if (trashCode.length() != 0) { return; } else { bool goodSeq = true; // innocent until proven guilty Sequence currSeq("dummy", contig); if(params->maxAmbig != -1 && params->maxAmbig < currSeq.getAmbigBases()) { goodSeq = false; trashCode += "ambig|"; } if(params->maxHomoP != -1 && params->maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = false; trashCode += "homop|"; } if(params->maxLength != -1 && params->maxLength < currSeq.getNumBases()) { goodSeq = false; trashCode += ">length|";} } } catch(exception& e) { params->m->errorOut(e, "MakeContigsCommand", "screenSequence"); exit(1); } } /**************************************************************************************************/ #ifdef USE_BOOST //ignore = read(fSeq, rSeq, fQual, rQual, savedFQual, savedRQual, findexBarcode, rindexBarcode, delim, inFF, inRF, inFQ, inRQ); bool read(Sequence& fSeq, Sequence& rSeq, QualityScores*& fQual, QualityScores*& rQual, Sequence& findexBarcode, Sequence& rindexBarcode, char delim, boost::iostreams::filtering_istream& inFF, boost::iostreams::filtering_istream& inRF, boost::iostreams::filtering_istream& inFQ, boost::iostreams::filtering_istream& inRQ, string thisfqualindexfile, string thisrqualindexfile, string format, int nameType, int offByOneTrimLength, MothurOut* m) { try { bool ignore = false; Utils util; if (delim == '@') { //fastq files bool tignore = false; FastqRead fread(inFF, tignore, format); gobble(inFF); FastqRead rread(inRF, ignore, format); gobble(inRF); if (!checkName(fread, rread, nameType, offByOneTrimLength)) { FastqRead f2read(inFF, tignore, format); if (!checkName(f2read, rread, nameType, offByOneTrimLength)) { FastqRead r2read(inRF, ignore, format); if (!checkName(fread, r2read, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fastq file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { rread = r2read; } }else { fread = f2read; } } if (tignore) { ignore=true; } fSeq.setName(fread.getName()); fSeq.setAligned(fread.getSeq()); rSeq.setName(rread.getName()); rSeq.setAligned(rread.getSeq()); fQual = new QualityScores(fread.getName(), fread.getScores()); rQual = new QualityScores(rread.getName(), rread.getScores()); if (thisfqualindexfile != "") { //forward index file FastqRead firead(inFQ, tignore, format); if (tignore) { ignore=true; } findexBarcode.setAligned(firead.getSeq()); if (!checkName(fread, firead, nameType, offByOneTrimLength)) { FastqRead f2iread(inFQ, tignore, format); if (tignore) { ignore=true; } if (!checkName(fread, f2iread, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward index file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { firead = f2iread; findexBarcode.setAligned(firead.getSeq()); } } } if (thisrqualindexfile != "") { //reverse index file FastqRead riread(inRQ, tignore, format); if (tignore) { ignore=true; } rindexBarcode.setAligned(riread.getSeq()); if (!checkName(fread, riread, nameType, offByOneTrimLength)) { FastqRead r2iread(inRQ, tignore, format); gobble(inRQ); if (tignore) { ignore=true; } if (!checkName(fread, r2iread, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in reverse index file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { riread = r2iread; rindexBarcode.setAligned(riread.getSeq()); } } } }else { //reading fasta and maybe qual Sequence tfSeq(inFF); Sequence trSeq(inRF); if (!checkName(tfSeq, trSeq, nameType, offByOneTrimLength)) { Sequence t2fSeq(inFF); if (!checkName(t2fSeq, trSeq, nameType, offByOneTrimLength)) { Sequence t2rSeq(inRF); if (!checkName(tfSeq, t2rSeq, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fasta file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; }else { trSeq = t2fSeq; } }else { tfSeq = t2fSeq; } } fSeq.setName(tfSeq.getName()); fSeq.setAligned(tfSeq.getAligned()); rSeq.setName(trSeq.getName()); rSeq.setAligned(trSeq.getAligned()); if (thisfqualindexfile != "") { fQual = new QualityScores(inFQ); gobble(inFQ); rQual = new QualityScores(inRQ); gobble(inRQ); if (!checkName(*fQual, *rQual, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse qual file. Ignoring, " + fQual->getName() + ".\n"); ignore = true; } if (fQual->getName() != tfSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in forward quality file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; } if (rQual->getName() != trSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in reverse quality file. Ignoring, " + trSeq.getName() + ".\n"); ignore = true; } } if (tfSeq.getName() != trSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fasta file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; } } return ignore; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "read"); exit(1); } } #endif /**************************************************************************************************/ bool read(Sequence& fSeq, Sequence& rSeq, QualityScores*& fQual, QualityScores*& rQual,Sequence& findexBarcode, Sequence& rindexBarcode, char delim, ifstream& inFFasta, ifstream& inRFasta, ifstream& inFQualIndex, ifstream& inRQualIndex, string thisfqualindexfile, string thisrqualindexfile, string format, int nameType, int offByOneTrimLength, MothurOut* m) { try { bool ignore = false; Utils util; if (delim == '@') { //fastq files bool tignore; FastqRead fread(inFFasta, tignore, format); gobble(inFFasta); FastqRead rread(inRFasta, ignore, format); gobble(inRFasta); if (!checkName(fread, rread, nameType, offByOneTrimLength)) { FastqRead f2read(inFFasta, tignore, format); gobble(inFFasta); if (!checkName(f2read, rread, nameType, offByOneTrimLength)) { FastqRead r2read(inRFasta, ignore, format); gobble(inRFasta); if (!checkName(fread, r2read, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fastq file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { rread = r2read; } }else { fread = f2read; } } if (tignore) { ignore=true; } fSeq.setName(fread.getName()); fSeq.setAligned(fread.getSeq()); rSeq.setName(rread.getName()); rSeq.setAligned(rread.getSeq()); fQual = new QualityScores(fread.getName(), fread.getScores()); rQual = new QualityScores(rread.getName(), rread.getScores()); if (thisfqualindexfile != "") { //forward index file FastqRead firead(inFQualIndex, tignore, format); gobble(inFQualIndex); if (tignore) { ignore=true; } findexBarcode.setAligned(firead.getSeq()); if (!checkName(fread, firead, nameType, offByOneTrimLength)) { FastqRead f2iread(inFQualIndex, tignore, format); gobble(inFQualIndex); if (tignore) { ignore=true; } if (!checkName(fread, f2iread, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward index file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { firead = f2iread; findexBarcode.setAligned(firead.getSeq()); } } } if (thisrqualindexfile != "") { //reverse index file FastqRead riread(inRQualIndex, tignore, format); gobble(inRQualIndex); if (tignore) { ignore=true; } rindexBarcode.setAligned(riread.getSeq()); if (!checkName(fread, riread, nameType, offByOneTrimLength)) { FastqRead r2iread(inRQualIndex, tignore, format); gobble(inRQualIndex); if (tignore) { ignore=true; } if (!checkName(fread, r2iread, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in reverse index file. Ignoring, " + fread.getName() + ".\n"); ignore = true; }else { riread = r2iread; rindexBarcode.setAligned(riread.getSeq()); } } } }else { //reading fasta and maybe qual Sequence tfSeq(inFFasta); gobble(inFFasta); Sequence trSeq(inRFasta); gobble(inRFasta); if (!checkName(tfSeq, trSeq, nameType, offByOneTrimLength)) { Sequence t2fSeq(inFFasta); gobble(inFFasta); if (!checkName(t2fSeq, trSeq, nameType, offByOneTrimLength)) { Sequence t2rSeq(inRFasta); gobble(inRFasta); if (!checkName(tfSeq, t2rSeq, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fasta file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; }else { trSeq = t2fSeq; } }else { tfSeq = t2fSeq; } } fSeq.setName(tfSeq.getName()); fSeq.setAligned(tfSeq.getAligned()); rSeq.setName(trSeq.getName()); rSeq.setAligned(trSeq.getAligned()); if (thisfqualindexfile != "") { fQual = new QualityScores(inFQualIndex); gobble(inFQualIndex); rQual = new QualityScores(inRQualIndex); gobble(inRQualIndex); if (!checkName(*fQual, *rQual, nameType, offByOneTrimLength)) { m->mothurOut("[WARNING]: name mismatch in forward and reverse qual file. Ignoring, " + fQual->getName() + ".\n"); ignore = true; } if (fQual->getName() != tfSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in forward quality file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; } if (rQual->getName() != trSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in reverse quality file. Ignoring, " + trSeq.getName() + ".\n"); ignore = true; } } if (tfSeq.getName() != trSeq.getName()) { m->mothurOut("[WARNING]: name mismatch in forward and reverse fasta file. Ignoring, " + tfSeq.getName() + ".\n"); ignore = true; } } return ignore; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "read"); exit(1); } } /**************************************************************************************************/ vector trimBarCodesAndPrimers(Sequence& fSeq, Sequence& rSeq, QualityScores*& fQual, QualityScores*& rQual, Sequence& findexBarcode, Sequence& rindexBarcode, vector trims, vector& codes, int numBarcodes, int numPrimers, bool hasQuality, bool hasIndex, int pdiffs, int bdiffs, int tdiffs, MothurOut* m) { try { vector oligosResults; oligosResults.resize(2, 0); //barcodeIndex, primerIndex codes.resize(2, ""); for (int i = 0; i < trims.size(); i++) { Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned()); Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned()); QualityScores* savedFQual = nullptr; QualityScores* savedRQual = nullptr; if (hasQuality) { savedFQual = new QualityScores(fQual->getName(), fQual->getScores()); savedRQual = new QualityScores(rQual->getName(), rQual->getScores()); } string trashCode = ""; string commentString = ""; int currentSeqsDiffs = 0; int barcodeIndex = 0; int primerIndex = 0; if(numBarcodes != 0){ vector results; if (hasQuality) { if (hasIndex) { results = trims[i]->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, barcodeIndex); } else { results = trims[i]->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, barcodeIndex); } }else { results = trims[i]->stripBarcode(savedFSeq, savedRSeq, barcodeIndex); } int success = results[0] + results[2]; commentString += "fbdiffs=" + toString(results[0]) + "(" + trims[i]->getCodeValue(results[1], bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + trims[i]->getCodeValue(results[3], bdiffs) + ") "; if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numPrimers != 0){ vector results; if (hasQuality) { results = trims[i]->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, primerIndex); } else { results = trims[i]->stripForward(savedFSeq, savedRSeq, primerIndex); } int success = results[0] + results[2]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trims[i]->getCodeValue(results[1], pdiffs) + "), rpdiffs=" + toString(results[2]) + "(" + trims[i]->getCodeValue(results[3], pdiffs) + ") "; if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if (trashCode == "") { oligosResults[0] = barcodeIndex; oligosResults[1] = primerIndex; codes[0] = ""; codes[1] = commentString; if (i > 0) { //checkOrient trimOligos - reoriented and reversed savedFSeq.reverseComplement(); savedRSeq.reverseComplement(); } fSeq.setAligned(savedFSeq.getUnaligned()); rSeq.setAligned(savedRSeq.getUnaligned()); if(hasQuality){ if (i > 0) { //checkOrient trimOligos - reoriented and reversed savedFQual->flipQScores(); savedRQual->flipQScores(); } fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores()); delete savedRQual; delete savedFQual; } break; }else { if (codes[0] == "") { codes[0] = trashCode; } else { codes[0] += "(" + trashCode + ")"; } codes[1] = commentString; if(hasQuality){ delete savedRQual; delete savedFQual; } } } if (hasQuality) { rQual->flipQScores(); } return oligosResults; }catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "read"); exit(1); } } //********************************************************************************************************************** //vector > fastaFileNames, vector > qualFileNames, , string group void driverContigs(contigsData* params){ try { vector< vector > qual_match_simple_bayesian; qual_match_simple_bayesian.resize(47); for (int i = 0; i < qual_match_simple_bayesian.size(); i++) { qual_match_simple_bayesian[i].resize(47); } vector< vector > qual_mismatch_simple_bayesian; qual_mismatch_simple_bayesian.resize(47); for (int i = 0; i < qual_mismatch_simple_bayesian.size(); i++) { qual_mismatch_simple_bayesian[i].resize(47); } loadQmatchValues(qual_match_simple_bayesian, qual_mismatch_simple_bayesian); params->count = 0; string thisfqualindexfile, thisrqualindexfile, thisffastafile, thisrfastafile; thisfqualindexfile = ""; thisrqualindexfile = ""; thisffastafile = params->inputFiles[0]; thisrfastafile = params->inputFiles[1]; if (params->qualOrIndexFiles.size() != 0) { thisfqualindexfile = params->qualOrIndexFiles[0]; thisrqualindexfile = params->qualOrIndexFiles[1]; } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: fqualindex = " + thisfqualindexfile + ".\n[DEBUG]: rqualindex = " + thisfqualindexfile + ".\n"); } ifstream inFFasta, inRFasta, inFQualIndex, inRQualIndex; #ifdef USE_BOOST boost::iostreams::filtering_istream inFF, inRF, inFQ, inRQ; #endif if (!params->gz) { //plain text files params->util.openInputFile(thisffastafile, inFFasta); params->util.openInputFile(thisrfastafile, inRFasta); inFFasta.seekg(params->linesInput.start); inRFasta.seekg(params->linesInputReverse.start); }else { //compressed files - no need to seekg because compressed files divide workload differently #ifdef USE_BOOST params->util.openInputFileBinary(thisffastafile, inFFasta, inFF); params->util.openInputFileBinary(thisrfastafile, inRFasta, inRF); #endif } ofstream outFasta, outMisMatch, outScrapFasta, outQual, outScrapQual; if (thisfqualindexfile != "") { if (thisfqualindexfile != "NONE") { if (!params->gz) { //plain text files params->util.openInputFile(thisfqualindexfile, inFQualIndex); inFQualIndex.seekg(params->qlinesInput.start); }else { #ifdef USE_BOOST params->util.openInputFileBinary(thisfqualindexfile, inFQualIndex, inFQ); #endif } //compressed files - no need to seekg because compressed files divide workload differently } else { thisfqualindexfile = ""; } if (thisrqualindexfile != "NONE") { if (!params->gz) { //plain text files params->util.openInputFile(thisrqualindexfile, inRQualIndex); inRQualIndex.seekg(params->qlinesInputReverse.start); }else { #ifdef USE_BOOST params->util.openInputFileBinary(thisrqualindexfile, inRQualIndex, inRQ); #endif } //compressed files - no need to seekg because compressed files divide workload differently } else { thisrqualindexfile = ""; } } bool hasQuality = false; bool hasIndex = false; if (params->delim == '@') { //fastq files so make an output quality hasQuality = true; if (thisfqualindexfile != "") { if (thisfqualindexfile != "NONE") { hasIndex = true; } } if (thisrqualindexfile != "") { if (thisrqualindexfile != "NONE") { hasIndex = true; } } }else if ((params->delim == '>') && (params->qualOrIndexFiles.size() != 0)) { hasQuality = true; } if (params->m->getDebug()) { if (hasQuality) { params->m->mothurOut("[DEBUG]: hasQuality = true\n"); } else { params->m->mothurOut("[DEBUG]: hasQuality = false\n"); } } int numPrimers = params->pairedPrimers.size(); int numBarcodes = params->pairedBarcodes.size(); vector trims; if ((numPrimers != 0) || (numBarcodes != 0)) { //standard trims.push_back(new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, params->pairedPrimers, params->pairedBarcodes, hasIndex)); if (params->reorient) { //reoriented trims.push_back(new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, params->reorientedPairedPrimers, params->reorientedPairedBarcodes, hasIndex)); //reversed trims.push_back(new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, params->reversedPairedPrimers, params->reversedPairedBarcodes, hasIndex)); } } Alignment* alignment; int longestBase = 1000; if(params->align == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, longestBase); } else if(params->align == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase); } else if(params->align == "kmer") { alignment = new KmerAlign(params->kmerSize); } bool good = true; while (good) { if (params->m->getControl_pressed()) { break; } bool ignore = false; Sequence fSeq, rSeq; QualityScores* fQual = nullptr; QualityScores* rQual = nullptr; Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE"); //read from input files if (params->gz) { #ifdef USE_BOOST ignore = read(fSeq, rSeq, fQual, rQual, findexBarcode, rindexBarcode, params->delim, inFF, inRF, inFQ, inRQ, thisfqualindexfile, thisrqualindexfile, params->format, params->nameType, params->offByOneTrimLength, params->m); #endif }else { ignore = read(fSeq, rSeq, fQual, rQual, findexBarcode, rindexBarcode, params->delim, inFFasta, inRFasta, inFQualIndex, inRQualIndex, thisfqualindexfile, thisrqualindexfile, params->format, params->nameType, params->offByOneTrimLength, params->m); } if (!ignore) { //remove primers and barcodes if neccessary vector codes; vector oligosResults = trimBarCodesAndPrimers(fSeq, rSeq, fQual, rQual, findexBarcode, rindexBarcode, trims, codes, numBarcodes, numPrimers, hasQuality, hasIndex, params->pdiffs, params->bdiffs, params->tdiffs, params->m); string trashCode = codes[0]; string commentString = codes[1]; int barcodeIndex = oligosResults[0]; int primerIndex = oligosResults[1]; //assemble reads string contig = ""; int oend, oStart; int numMismatches = 0; vector scores1, scores2; if(hasQuality){ scores1 = fQual->getScores(); scores2 = rQual->getScores(); delete fQual; delete rQual; } vector contigScores = assembleFragments(qual_match_simple_bayesian, qual_mismatch_simple_bayesian, fSeq, rSeq, scores1, scores2, hasQuality, alignment, contig, trashCode, oend, oStart, numMismatches, params->insert, params->deltaq, params->trimOverlap); //Note that usearch/vsearch cap the maximum Q value at 41 - perhaps due to ascii //limits? we leave this value unbounded. if two sequences have a 40 then the //assembled quality score will be 85. If two 250 nt reads are all 40 and they //perfectly match each other, then the difference in the number of expected errors //between using 85 and 41 all the way across will be 0.01986 - this is a "worst" //case scenario double expected_errors = 0; for(int i=0;i params->maxee) { trashCode += 'e' ;} if (params->screenSequences) { screenSequence(contig, trashCode, params); } if(trashCode.length() == 0){ string thisGroup = params->group; //group from file file if (params->createGroupFromOligos) { //overwrite file file group for oligos group if(numBarcodes != 0){ thisGroup = params->barcodeNameVector[barcodeIndex]; if (numPrimers != 0) { if (params->primerNameVector[primerIndex] != "") { if(thisGroup != "") { thisGroup += "." + params->primerNameVector[primerIndex]; } else { thisGroup = params->primerNameVector[primerIndex]; } } } } } int pos = thisGroup.find("ignore"); if (pos == string::npos) { if (thisGroup != "") { params->groupMap[fSeq.getName()] = thisGroup; map::iterator it = params->groupCounts.find(thisGroup); if (it == params->groupCounts.end()) { params->groupCounts[thisGroup] = 1; } else { params->groupCounts[it->first] ++; } } }else { ignore = true; } //print good stuff if(!ignore){ //output string output = ">" + fSeq.getName() + '\t' + "ee=" + toString(expected_errors) + '\t' + commentString + "\n" + contig + "\n"; params->trimFileName->write(output); if (hasQuality && params->makeQualFile) { output = ">" + fSeq.getName() + '\t' + "ee=" + toString(expected_errors) + '\t' + commentString +"\n"; for (int i = 0; i < contigScores.size(); i++) { output += toString(contigScores[i]) + " "; } output += "\n"; params->trimQFileName->write(output); } int numNs = 0; for (int i = 0; i < contig.length(); i++) { if (contig[i] == 'N') { numNs++; } } output = fSeq.getName() + '\t' + toString(contig.length()) + '\t' + toString(oend-oStart) + '\t' + toString(oStart) + '\t' + toString(oend) + '\t' + toString(numMismatches) + '\t' + toString(numNs) + '\t' + toString(expected_errors) + "\n"; params->misMatchesFile->write(output); } }else{ params->badNames.insert(fSeq.getName()); string output = ">" + fSeq.getName() + " | " + trashCode + '\t' + "ee=" + toString(expected_errors) + '\t' + commentString + "\n" + contig + "\n"; params->scrapFileName->write(output); if (hasQuality && params->makeQualFile) { output = ">" + fSeq.getName() + " | " + trashCode + '\t' + "ee=" + toString(expected_errors) + '\t' + commentString + "\n"; for (int i = 0; i < contigScores.size(); i++) { output += toString(contigScores[i]) + " "; } output += "\n"; params->scrapQFileName->write(output); } } if (params->m->getDebug()) { params->m->mothurOut("\n"); } } params->count++; #if defined NON_WINDOWS if (!params->gz) { double pos = inFFasta.tellg(); if (params->util.isEqual(pos,-1) || (pos >= params->linesInput.end)) { good = false; break; } }else { #ifdef USE_BOOST if (inFF.eof() || inRF.eof()) { good = false; break; } #endif } #else if (!params->gz) { if (params->count >= params->linesInput.end) { good = false; break; } }else { #ifdef USE_BOOST if (inFF.eof() || inRF.eof()) { good = false; break; } #endif } #endif //report progress if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } } //report progress if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } //close files inFFasta.close(); inRFasta.close(); if (params->gz) { #ifdef USE_BOOST inFF.pop(); inRF.pop(); #endif } if (params->delim == '@') { if (thisfqualindexfile != "") { inFQualIndex.close(); if (params->gz) { #ifdef USE_BOOST inFQ.pop(); #endif } } if (thisrqualindexfile != "") { inRQualIndex.close(); if (params->gz) { #ifdef USE_BOOST inRQ.pop(); #endif } } }else{ if (hasQuality) { inFQualIndex.close(); inRQualIndex.close(); if (params->gz) { #ifdef USE_BOOST inFQ.pop(); inRQ.pop(); #endif } } } //cleanup memory for (int i = 0; i < trims.size(); i++) { delete trims[i]; } delete alignment; } catch(exception& e) { params->m->errorOut(e, "MakeContigsCommand", "driverContigs"); exit(1); } } //********************************************************************************************************************** //fileInputs[0] = forward Fasta or Forward Fastq, fileInputs[1] = reverse Fasta or reverse Fastq. if qualOrIndexFiles.size() != 0, then qualOrIndexFiles[0] = forward qual or Forward index, qualOrIndexFiles[1] = reverse qual or reverse index. //lines[0] - ffasta, lines[1] - rfasta) - processor1 //lines[2] - ffasta, lines[3] - rfasta) - processor2 //lines[4] - ffasta, lines[5] - rfasta) - processor3 //... //qlines[0] - fqual or findex, qlines[1] - rqual or rindex) - processor1 //qlines[2] - fqual or findex, qlines[3] - rqual or rindex) - processor2 //qlines[4] - fqual or findex, qlines[5] - rqual or rindex) - processor3 //... //if using index files and only have 1 then the file name = NONE, and entries are duds. Copies of other index file. //if no index files are given, then qualOrIndexFiles.size() == 0. unsigned long long MakeContigsCommand::createProcesses(vector fileInputs, vector qualOrIndexFiles, string outputFasta, string outputScrapFasta, string outputQual, string outputScrapQual, string outputMisMatches, vector > fastaFileNames, vector > qualFileNames, string group, map& pairedPrimers, map& rpairedPrimers, map& revpairedPrimers, map& pairedBarcodes, map& rpairedBarcodes, map& revpairedBarcodes, vector& barcodeNames, vector& primerNames) { try { vector lines; vector qLines; if (gz) { nameType = setNameType(fileInputs[0], fileInputs[1], delim, offByOneTrimLength, gz, format); for (int i = 0; i < fileInputs.size(); i++) { //fake out lines - we are just going to check for end of file. Work is divided by number of files per processor. lines.push_back(linePair(0, 1000)); qLines.push_back(linePair(0, 1000)); } processors = fileInputs.size() / 2; }else { //divides the files so that the processors can share the workload. setLines(fileInputs, qualOrIndexFiles, lines, qLines, delim); } bool hasQuality = false; if (delim == '@') { hasQuality = true; } else if ((delim == '>') && (qualOrIndexFiles.size() != 0)) { hasQuality = true; } //create array of worker threads vector workerThreads; vector data; auto synchronizedOutputFastaTrimFile = std::make_shared(outputFasta); auto synchronizedOutputFastaScrapFile = std::make_shared(outputScrapFasta); auto synchronizedMisMatchFile = std::make_shared(outputMisMatches); auto synchronizedOutputQTrimFile = std::make_shared(outputQual); auto synchronizedOutputQScrapFile = std::make_shared(outputScrapQual); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadMismatchWriter = new OutputWriter(synchronizedMisMatchFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (makeQualFile) { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } int spot = (i+1)*2; contigsData* dataBundle = new contigsData(threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, threadMismatchWriter, fileInputs, qualOrIndexFiles, lines[spot], lines[spot+1], qLines[spot], qLines[spot+1]); dataBundle->setVariables(gz, delim, nameType, offByOneTrimLength, pairedBarcodes, pairedPrimers, rpairedBarcodes, rpairedPrimers, revpairedBarcodes, revpairedPrimers, primerNames, barcodeNames, reorient, pdiffs, bdiffs, tdiffs, align, match, misMatch, gapOpen, gapExtend, insert, deltaq, maxee, kmerSize, format, trimOverlap, createOligosGroup, createFileGroup, group, screenSequences, maxHomoP, maxLength, maxAmbig); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverContigs, dataBundle)); } OutputWriter* threadMisMatchWriter = new OutputWriter(synchronizedMisMatchFile); OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (makeQualFile) { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } contigsData* dataBundle = new contigsData(threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, threadMisMatchWriter, fileInputs, qualOrIndexFiles, lines[0], lines[1], qLines[0], qLines[1]); dataBundle->setVariables(gz, delim, nameType, offByOneTrimLength, pairedBarcodes, pairedPrimers, rpairedBarcodes, rpairedPrimers, revpairedBarcodes, revpairedPrimers, primerNames, barcodeNames, reorient, pdiffs, bdiffs, tdiffs, align, match, misMatch, gapOpen, gapExtend, insert, deltaq, maxee, kmerSize, format, trimOverlap, createOligosGroup, createFileGroup, group, screenSequences, maxHomoP, maxLength, maxAmbig); driverContigs(dataBundle); long long num = dataBundle->count; badNames.insert(dataBundle->badNames.begin(), dataBundle->badNames.end()); groupMap.insert(dataBundle->groupMap.begin(), dataBundle->groupMap.end()); for (map::iterator it = dataBundle->groupCounts.begin(); it != dataBundle->groupCounts.end(); it++) { map::iterator itMine = groupCounts.find(it->first); if (itMine != groupCounts.end()) { itMine->second += it->second; } else { groupCounts[it->first] = it->second; } } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->trimFileName; delete data[i]->scrapFileName; delete data[i]->misMatchesFile; if (makeQualFile) { delete data[i]->trimQFileName; delete data[i]->scrapQFileName; } badNames.insert(data[i]->badNames.begin(), data[i]->badNames.end()); groupMap.insert(data[i]->groupMap.begin(), data[i]->groupMap.end()); //merge counts for (map::iterator it = data[i]->groupCounts.begin(); it != data[i]->groupCounts.end(); it++) { map::iterator itMine = groupCounts.find(it->first); if (itMine != groupCounts.end()) { itMine->second += it->second; } else { groupCounts[it->first] = it->second; } } delete data[i]; delete workerThreads[i]; } delete threadFastaTrimWriter; delete threadFastaScrapWriter; delete threadMisMatchWriter; if (makeQualFile) { delete threadQTrimWriter; delete threadQScrapWriter; } delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** //process one file at a time, only get here with gz=true void driverContigsGroups(groupContigsData* gparams) { try { gparams->count = 0; gparams->bundle->delim = '@'; for (int l = gparams->start; l < gparams->end; l++) { int startTime = time(nullptr); if (gparams->bundle->m->getControl_pressed()) { break; } gparams->bundle->m->mothurOut("\n>>>>>\tProcessing file pair " + gparams->fileInputs[l][0] + " - " + gparams->fileInputs[l][1] + " (files " + toString(l+1) + " of " + toString(gparams->fileInputs.size()) + ")\t<<<<<\n"); vector theseFileInputs; vector theseQIInputs; string ffastqfile = gparams->fileInputs[l][0]; theseFileInputs.push_back(ffastqfile); string rfastqfile = gparams->fileInputs[l][1]; theseFileInputs.push_back(rfastqfile); string findexfile = gparams->fileInputs[l][2]; theseQIInputs.push_back(findexfile); //could be blank, "NONE" or filename string rindexfile = gparams->fileInputs[l][3]; theseQIInputs.push_back(rindexfile); //could be blank, "NONE" or filename gparams->bundle->group = gparams->file2Groups[l]; //blank if no group assigned to file pair bool decompressionHelped = false; //test to make sure you can read the gz files bool readable = testGZReadable(theseFileInputs, theseQIInputs, decompressionHelped, gparams->bundle->format, gparams->bundle->m); if (readable) { if (decompressionHelped) { gparams->bundle->gz = false; } }else { gparams->bundle->m->mothurOut("[ERROR]: Unable to read compressed .gz files, please decompress and run make.contigs again. \n"); gparams->bundle->m->setControl_pressed(true); break; } //find read name type to speed read matching later gparams->bundle->nameType = setNameType(theseFileInputs[0], theseFileInputs[1], gparams->bundle->delim, gparams->bundle->offByOneTrimLength, gparams->bundle->gz, gparams->bundle->format); //fake out lines - we are just going to check for end of file. Work is divided by number of files per processor. vector thisLines; vector thisQLines; if (decompressionHelped) { //set file positions for file int processors = 1; vector fastaFilePos = gparams->bundle->util.divideFile(theseFileInputs[0], processors, gparams->bundle->delim); thisLines.push_back(linePair(fastaFilePos[0], fastaFilePos[1])); //forward fastq fastaFilePos = gparams->bundle->util.divideFile(theseFileInputs[1], processors, gparams->bundle->delim); thisLines.push_back(linePair(fastaFilePos[0], fastaFilePos[1])); //reverse fastq if ((theseQIInputs[0] != "") && (theseQIInputs[0] != "NONE")){ fastaFilePos = gparams->bundle->util.divideFile(theseQIInputs[0], processors, gparams->bundle->delim); thisQLines.push_back(linePair(fastaFilePos[0], fastaFilePos[1])); //forward index } if ((theseQIInputs[1] != "") && (theseQIInputs[1] != "NONE")){ fastaFilePos = gparams->bundle->util.divideFile(theseQIInputs[1], processors, gparams->bundle->delim); thisQLines.push_back(linePair(fastaFilePos[0], fastaFilePos[1])); //forward index } if (thisQLines.size() == 0) { thisQLines = thisLines; } } else { thisLines.push_back(linePair(0, 1000)); thisLines.push_back(linePair(0, 1000)); //fasta[0], fasta[1] - forward and reverse thisQLines.push_back(linePair(0, 1000)); thisQLines.push_back(linePair(0, 1000)); //qual[0], qual[1] - forward and reverse } gparams->bundle->m->mothurOut("Making contigs...\n"); contigsData* dataBundle = new contigsData(gparams->bundle->trimFileName, gparams->bundle->scrapFileName, gparams->bundle->trimQFileName, gparams->bundle->scrapQFileName, gparams->bundle->misMatchesFile, theseFileInputs, theseQIInputs, thisLines[0], thisLines[1], thisQLines[0], thisQLines[1]); dataBundle->copyVariables(gparams->bundle); driverContigs(dataBundle); if (decompressionHelped) { gparams->bundle->util.mothurRemove(theseFileInputs[0]); gparams->bundle->util.mothurRemove(theseFileInputs[1]); if (theseQIInputs[0] != "NONE") { gparams->bundle->util.mothurRemove(theseQIInputs[0]); } if (theseQIInputs[1] != "NONE") { gparams->bundle->util.mothurRemove(theseQIInputs[1]); } } gparams->count += dataBundle->count; gparams->badNames.insert(dataBundle->badNames.begin(), dataBundle->badNames.end()); gparams->bundle->groupMap.insert(dataBundle->groupMap.begin(), dataBundle->groupMap.end()); for (map::iterator it = dataBundle->groupCounts.begin(); it != dataBundle->groupCounts.end(); it++) { map::iterator itMine = gparams->bundle->groupCounts.find(it->first); if (itMine != gparams->bundle->groupCounts.end()) { itMine->second += it->second; } else { gparams->bundle->groupCounts[it->first] = it->second; } } gparams->bundle->m->mothurOut("Done.\n\nIt took " + toString(time(nullptr) - startTime) + " secs to assemble " + toString(dataBundle->count) + " reads.\n\n"); delete dataBundle; } } catch(exception& e) { gparams->bundle->m->errorOut(e, "MakeContigsCommand", "driverContigsGroups"); exit(1); } } //********************************************************************************************************************** //only getting here is gz=true unsigned long long MakeContigsCommand::createProcessesGroups(vector< vector > fileInputs, string compositeFastaFile, string compositeScrapFastaFile, string compositeQualFile, string compositeScrapQualFile, string compositeMisMatchFile, map& file2Groups) { try { map pairedPrimers, rpairedPrimers, revpairedPrimers, pairedBarcodes, rpairedBarcodes, revpairedBarcodes; vector barcodeNames, primerNames; if(oligosfile != "") { createOligosGroup = getOligos(pairedPrimers, rpairedPrimers, revpairedPrimers, pairedBarcodes, rpairedBarcodes, revpairedBarcodes, barcodeNames, primerNames); } //give group in file file precedence if (createFileGroup) { createOligosGroup = false; } vector workerThreads; vector data; //divide files between processors vector startEndIndexes; int remainingPairs = fileInputs.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } startEndIndexes.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } auto synchronizedOutputFastaTrimFile = std::make_shared(compositeFastaFile); auto synchronizedOutputFastaScrapFile = std::make_shared(compositeScrapFastaFile); auto synchronizedOutputQTrimFile = std::make_shared(compositeQualFile); auto synchronizedOutputQScrapFile = std::make_shared(compositeScrapQualFile); auto synchronizedMisMatchFile = std::make_shared(compositeMisMatchFile); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadMismatchWriter = new OutputWriter(synchronizedMisMatchFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (makeQualFile) { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } contigsData* dataBundle = new contigsData(threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, threadMismatchWriter); dataBundle->setVariables(gz, delim, nameType, offByOneTrimLength, pairedBarcodes, pairedPrimers, rpairedBarcodes, rpairedPrimers, revpairedBarcodes, revpairedPrimers, primerNames, barcodeNames, reorient, pdiffs, bdiffs, tdiffs, align, match, misMatch, gapOpen, gapExtend, insert, deltaq, maxee, kmerSize, format, trimOverlap, createOligosGroup, createFileGroup, "", screenSequences, maxHomoP, maxLength, maxAmbig); groupContigsData* groupDataBundle = new groupContigsData(fileInputs, startEndIndexes[i+1].start, startEndIndexes[i+1].end, dataBundle, file2Groups); data.push_back(groupDataBundle); workerThreads.push_back(new std::thread(driverContigsGroups, groupDataBundle)); } OutputWriter* threadMisMatchWriter = new OutputWriter(synchronizedMisMatchFile); OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (makeQualFile) { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } contigsData* dataBundle = new contigsData(threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, threadMisMatchWriter); dataBundle->setVariables(gz, delim, nameType, offByOneTrimLength, pairedBarcodes, pairedPrimers, rpairedBarcodes, rpairedPrimers, revpairedBarcodes, revpairedPrimers, primerNames, barcodeNames, reorient, pdiffs, bdiffs, tdiffs, align, match, misMatch, gapOpen, gapExtend, insert, deltaq, maxee, kmerSize, format, trimOverlap, createOligosGroup, createFileGroup, "", screenSequences, maxHomoP, maxLength, maxAmbig); groupContigsData* groupDataBundle = new groupContigsData(fileInputs, startEndIndexes[0].start, startEndIndexes[0].end, dataBundle, file2Groups); driverContigsGroups(groupDataBundle); delete threadFastaTrimWriter; delete threadFastaScrapWriter; delete threadMisMatchWriter; if (makeQualFile) { delete threadQTrimWriter; delete threadQScrapWriter; } long long num = groupDataBundle->count; badNames.insert(dataBundle->badNames.begin(), dataBundle->badNames.end()); groupMap.insert(groupDataBundle->bundle->groupMap.begin(), groupDataBundle->bundle->groupMap.end()); for (map::iterator it = dataBundle->groupCounts.begin(); it != dataBundle->groupCounts.end(); it++) { map::iterator itMine = groupCounts.find(it->first); if (itMine != groupCounts.end()) { itMine->second += it->second; } else { groupCounts[it->first] = it->second; } } delete groupDataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->bundle->trimFileName; delete data[i]->bundle->scrapFileName; delete data[i]->bundle->misMatchesFile; if (makeQualFile) { delete data[i]->bundle->trimQFileName; delete data[i]->bundle->scrapQFileName; } badNames.insert(data[i]->badNames.begin(), data[i]->badNames.end()); groupMap.insert(data[i]->bundle->groupMap.begin(), data[i]->bundle->groupMap.end()); //merge counts for (map::iterator it = data[i]->bundle->groupCounts.begin(); it != data[i]->bundle->groupCounts.end(); it++) { map::iterator itMine = groupCounts.find(it->first); if (itMine != groupCounts.end()) { itMine->second += it->second; } else { groupCounts[it->first] = it->second; } } delete data[i]; delete workerThreads[i]; } return num; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "createProcessesGroups"); exit(1); } } /**************************************************************************************************/ int MakeContigsCommand::setLines(vector fasta, vector qual, vector& lines, vector& qLines, char delim) { try { lines.clear(); qLines.clear(); vector fastaFilePos; vector qfileFilePos; vector temp; nameType = setNameType(fasta[0], fasta[1], delim, offByOneTrimLength, gz, format); #if defined NON_WINDOWS //set file positions for fasta file fastaFilePos = util.divideFile(fasta[0], processors, delim); //get name of first sequence in each chunk map firstSeqNames; map trimmedNames; for (int i = 0; i < (fastaFilePos.size()-1); i++) { ifstream in; util.openInputFile(fasta[0], in); in.seekg(fastaFilePos[i]); string name = ""; if (delim == '>') { Sequence temp(in); name = temp.getName(); }else { string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); name = pieces[0]; name = name.substr(1); util.checkName(name); } fixName(name, nameType, offByOneTrimLength); firstSeqNames[name] = i; in.close(); } map copy; if (qual.size() != 0) { copy = firstSeqNames; } //look for match in reverse file ifstream in2; util.openInputFile(fasta[1], in2); string input; while(!in2.eof()){ input = util.getline(in2); gobble(in2); if (input.length() != 0) { if(input[0] == delim){ //this is a name line vector pieces = util.splitWhiteSpace(input); string name = pieces[0]; name = name.substr(1); util.checkName(name); fixName(name, nameType, offByOneTrimLength); map::iterator it = firstSeqNames.find(name); if (it != firstSeqNames.end()) { //this is the start of a new chunk double pos = in2.tellg(); qfileFilePos.push_back(pos - input.length() - 1); firstSeqNames.erase(it); } } } if ((firstSeqNames.size() == 0)) { break; } } in2.close(); //get last file position of reverse fasta[1] FILE * pFile; double size; //get num bytes in file fasta[1] = util.getFullPathName(fasta[1]); pFile = fopen (fasta[1].c_str(),"rb"); if (pFile==nullptr) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); } qfileFilePos.push_back(size); if ((firstSeqNames.size() != 0)){ for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { if (delim == '>') { m->mothurOut(it->first + " is in your forward fasta file and not in your reverse file, please remove it using the remove.seqs command before proceeding.\n"); }else { m->mothurOut(it->first + " is in your forward fastq file and not in your reverse file, please remove it using the remove.seqs command before proceeding.\n"); } } m->setControl_pressed(true); return processors; } //fill lines with paired forward and reverse fasta lines for (int i = 0; i < (fastaFilePos.size()-1); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: forward " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); } lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)])); if (m->getDebug()) { m->mothurOut("[DEBUG]: reverse " + toString(i) +'\t' + toString(qfileFilePos[i]) + '\t' + toString(qfileFilePos[i+1]) + '\n'); } lines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); } qfileFilePos.clear(); if (qual.size() != 0) { firstSeqNames = copy; if (qual[0] != "NONE") { //seach for filePos of each first name in the qfile and save in qfileFilePos ifstream inQual; util.openInputFile(qual[0], inQual); string input; while(!inQual.eof()){ input = util.getline(inQual); gobble(inQual); if (input.length() != 0) { if(input[0] == delim){ //this is a sequence name line vector pieces = util.splitWhiteSpace(input); string name = pieces[0]; name = name.substr(1); util.checkName(name); fixName(name, nameType, offByOneTrimLength); map::iterator it = firstSeqNames.find(name); if(it != firstSeqNames.end()) { //this is the start of a new chunk double pos = inQual.tellg(); qfileFilePos.push_back(pos - input.length() - 1); firstSeqNames.erase(it); } } } if ((firstSeqNames.size() == 0)) { break; } } inQual.close(); //get last file position of reverse qual[0] FILE * pFile; double size; //get num bytes in file qual[0] = util.getFullPathName(qual[0]); pFile = fopen (qual[0].c_str(),"rb"); if (pFile==nullptr) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); } qfileFilePos.push_back(size); if ((firstSeqNames.size() != 0)){ for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { if (delim == '>') { m->mothurOut(it->first + " is in your forward fasta file and reverse fasta file, but not your forward qfile, please remove it using the remove.seqs command before proceeding.\n"); }else { m->mothurOut(it->first + " is in your forward fastq file and reverse fastq file, but not your forward index, please remove it using the remove.seqs command before proceeding.\n"); } } m->setControl_pressed(true); return processors; } } firstSeqNames = copy; if (qual[1] != "NONE") { ifstream inQual2; util.openInputFile(qual[1], inQual2); while(!inQual2.eof()){ input = util.getline(inQual2); gobble(inQual2); if (input.length() != 0) { if(input[0] == delim){ //this is a sequence name line vector pieces = util.splitWhiteSpace(input); string name = pieces[0]; name = name.substr(1); util.checkName(name); fixName(name, nameType, offByOneTrimLength); map::iterator it = firstSeqNames.find(name); if(it != firstSeqNames.end()) { //this is the start of a new chunk double pos = inQual2.tellg(); temp.push_back(pos - input.length() - 1); firstSeqNames.erase(it); } } } if ((firstSeqNames.size() == 0)) { break; } } inQual2.close(); //get last file position of reverse qual[1] FILE * pFile2; //get num bytes in file qual[1] = util.getFullPathName(qual[1]); pFile2 = fopen (qual[1].c_str(),"rb"); if (pFile2==nullptr) perror ("Error opening file"); else{ fseek (pFile2, 0, SEEK_END); size=ftell (pFile2); fclose (pFile2); } temp.push_back(size); if ((firstSeqNames.size() != 0)){ for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { if (delim == '>') { m->mothurOut(it->first + " is in your forward fasta file, reverse fasta file, and forward qfile but not your reverse qfile, please remove it using the remove.seqs command before proceeding.\n"); }else { if (qual[0] != "NONE") { m->mothurOut(it->first + " is in your forward fastq file, reverse fastq file, and forward index but not your reverse index, please remove it using the remove.seqs command before proceeding.\n"); }else { m->mothurOut(it->first + " is in your forward fastq file, reverse fastq file, but not your reverse index, please remove it using the remove.seqs command before proceeding.\n"); } } } m->setControl_pressed(true); return processors; } } if (qual[0] == "NONE") { qfileFilePos = temp; } //fill with duds, if both were NONE then qual.size() == 0 if (qual[1] == "NONE") { temp = qfileFilePos; } //fill with duds, if both were NONE then qual.size() == 0 //fill lines with paired forward and reverse fasta lines for (int i = 0; i < (fastaFilePos.size()-1); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: forward " + toString(i) +'\t' + toString(qfileFilePos[i]) + '\t' + toString(qfileFilePos[i+1]) + '\n'); } qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); if (m->getDebug()) { m->mothurOut("[DEBUG]: reverse " + toString(i) +'\t' + toString(temp[i]) + '\t' + toString(temp[i+1]) + '\n'); } qLines.push_back(linePair(temp[i], temp[(i+1)])); } }else { qLines = lines; } //files with duds return processors; #else long long numFastaSeqs = 0; fastaFilePos = util.setFilePosFasta(fasta[0], numFastaSeqs, delim); //forward if (numFastaSeqs < processors) { processors = numFastaSeqs; } long long numRFastaSeqs = 0; qfileFilePos = util.setFilePosFasta(fasta[1], numRFastaSeqs, delim); //reverse if (numFastaSeqs != numRFastaSeqs) { if (delim == '>') { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fasta file, but " + toString(numRFastaSeqs) + " sequences in your reverse fasta file. Please use the list.seqs and get.seqs commands to make the files match before proceeding. list.seqs(fasta=yourForward.fasta-yourReverse.fasta);get.seqs(fasta=yourForward.fasta, accnos=current);get.seqs(fasta=yourReverse.fasta, accnos=current);\n"); m->setControl_pressed(true); return processors; }else { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fastq file, but " + toString(numRFastaSeqs) + " sequences in your reverse fastq file. Please use the list.seqs and get.seqs commands to make the files match before proceeding. list.seqs(fastq=yourForward.fastq-yourReverse.fastq);get.seqs(fastq=yourForward.fastq, accnos=current);get.seqs(fastq=yourReverse.fastq, accnos=current);\n"); m->setControl_pressed(true); return processors; } } //figure out how many sequences you have to process unsigned long long numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { unsigned long long startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor)); //forward lines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); //reverse } if (qual.size() != 0) { long long numFQualSeqs = 0; long long numRQualSeqs = 0; fastaFilePos.clear(); qfileFilePos.clear(); if (qual[0] != "NONE") { fastaFilePos = util.setFilePosFasta(qual[0], numFQualSeqs, delim); } //forward index or qual file if (qual[1] != "NONE") { qfileFilePos = util.setFilePosFasta(qual[1], numRQualSeqs, delim); }//reverse index or qual file if (qual[0] == "NONE") { fastaFilePos = qfileFilePos; numFQualSeqs = numRQualSeqs; } //fill with duds, if both were NONE then qual.size() == 0 if (qual[1] == "NONE") { qfileFilePos = fastaFilePos; numRQualSeqs = numFQualSeqs; } //fill with duds, if both were NONE then qual.size() == 0 if ((numFQualSeqs != numRQualSeqs) || (numFQualSeqs != numFastaSeqs)){ if (delim == '>') { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fasta file, " + toString(numRFastaSeqs) + " sequences in your reverse fasta file, " + toString(numFQualSeqs) + " sequences in your forward qual file, " + toString(numRQualSeqs) + " sequences in your reverse qual file. Please use the list.seqs and get.seqs commands to make the files match before proceeding.\n"); m->setControl_pressed(true); return processors; }else { if (qual[0] != "NONE") { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fastq file, " + toString(numRFastaSeqs) + " sequences in your reverse fastq file and " + toString(numRQualSeqs) + " sequences in your reverse index file. Please use the list.seqs and get.seqs commands to make the files match before proceeding.\n"); m->setControl_pressed(true); return processors; }else if (qual[1] != "NONE") { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fastq file, " + toString(numRFastaSeqs) + " sequences in your reverse fastq file and " + toString(numFQualSeqs) + " sequences in your forward index file. Please use the list.seqs and get.seqs commands to make the files match before proceeding.\n"); m->setControl_pressed(true); return processors; }else { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your forward fastq file, " + toString(numRFastaSeqs) + " sequences in your reverse fastq file, " + toString(numFQualSeqs) + " sequences in your forward index file, " + toString(numRQualSeqs) + " sequences in your reverse index file. Please use the list.seqs and get.seqs commands to make the files match before proceeding.\n"); m->setControl_pressed(true); return processors; } } } //figure out how many sequences you have to process unsigned long long numSeqsPerProcessor = numFQualSeqs / processors; for (int i = 0; i < processors; i++) { unsigned long long startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFQualSeqs - i * numSeqsPerProcessor; } qLines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor)); //forward qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); //reverse } }else { qLines = lines; } //files with duds if(qual.size() == 0) { qLines = lines; } //files with duds return 1; #endif } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "setLines"); exit(1); } } //*************************************************************************************************************** //lines can be 2, 3, or 4 columns // forward.fastq reverse.fastq -> 2 column // groupName forward.fastq reverse.fastq -> 3 column // forward.fastq reverse.fastq forward.index.fastq reverse.index.fastq -> 4 column // forward.fastq reverse.fastq none reverse.index.fastq -> 4 column // forward.fastq reverse.fastq forward.index.fastq none -> 4 column vector< vector > MakeContigsCommand::readFileNames(string filename, map& file2Group){ try { FileFile dataFile(filename, "contigs"); vector< vector > files = dataFile.getFiles(); gz = dataFile.isGZ(); file2Group = dataFile.getFile2Group(); createFileGroup = dataFile.isColumnWithGroupNames(); if (dataFile.containsIndexFiles() && (oligosfile == "")) { m->mothurOut("[ERROR]: You need to provide an oligos file if you are going to use an index file.\n"); m->setControl_pressed(true); } if (files.size() == 0) { m->setControl_pressed(true); } return files; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "readFileNames"); exit(1); } } //*************************************************************************************************************** //illumina data requires paired forward and reverse data //BARCODE atgcatgc atgcatgc groupName //PRIMER atgcatgc atgcatgc groupName //PRIMER atgcatgc atgcatgc bool MakeContigsCommand::getOligos(map& pairedPrimers, map& rpairedPrimers, map& revpairedPrimers, map& pairedBarcodes, map& rpairedBarcodes, map& revpairedBarcodes, vector& barcodeNames, vector& primerNames){ try { if (m->getDebug()) { m->mothurOut("[DEBUG]: oligosfile = " + oligosfile + "\n"); } bool allBlank = false; Oligos oligos; oligos.read(oligosfile, false); if (m->getControl_pressed()) { return false; } //error in reading oligos if (oligos.hasPairedBarcodes() || oligos.hasPairedPrimers()) { pairedPrimers = oligos.getPairedPrimers(); rpairedPrimers = oligos.getReorientedPairedPrimers(); revpairedPrimers = oligos.getReversedPairedPrimers(); primerNames = oligos.getPrimerNames(); pairedBarcodes = oligos.getPairedBarcodes(); rpairedBarcodes = oligos.getReorientedPairedBarcodes(); revpairedBarcodes = oligos.getReversedPairedBarcodes(); barcodeNames = oligos.getBarcodeNames(); if (m->getDebug()) { map::iterator it; m->mothurOut("\n[DEBUG]: paired primers - \n"); for (it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { m->mothurOut("[DEBUG]: " + primerNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } m->mothurOut("\n[DEBUG]: paired reoriented primers - \n"); for (it = rpairedPrimers.begin(); it != rpairedPrimers.end(); it++) { m->mothurOut("[DEBUG]: " + primerNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } m->mothurOut("\n[DEBUG]: paired reversed primers - \n"); for (it = revpairedPrimers.begin(); it != revpairedPrimers.end(); it++) { m->mothurOut("[DEBUG]: " + primerNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } m->mothurOut("\n[DEBUG]: paired barcodes - \n"); for (it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { m->mothurOut("[DEBUG]: " + barcodeNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } m->mothurOut("\n[DEBUG]: paired reoriented barcodes - \n"); for (it = rpairedBarcodes.begin(); it != rpairedBarcodes.end(); it++) { m->mothurOut("[DEBUG]: " + barcodeNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } m->mothurOut("\n[DEBUG]: paired reversed barcodes - \n"); for (it = revpairedBarcodes.begin(); it != revpairedBarcodes.end(); it++) { m->mothurOut("[DEBUG]: " + barcodeNames[it->first] + "\t" + it->second.forward + "\t" + it->second.reverse + "\n"); } } }else { m->mothurOut("[ERROR]: make.contigs requires paired barcodes and primers. You can set one end to NONE if you are using an index file.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { return false; } int numLinkers = oligos.getLinkers().size(); int numSpacers = oligos.getSpacers().size(); if (numLinkers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); } if (numSpacers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); } vector groupNames = oligos.getGroupNames(); if (groupNames.size() == 0) { allFiles = false; allBlank = true; } if (allBlank) { m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile.\n"); allFiles = false; return false; } return true; } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "getOligos"); exit(1); } } //********************************************************************************************************************** void MakeContigsCommand::debugFunction() { try{ //allows you to run the oligos and index file independantly to check for barcode issues. make.contigs(findex=yourIndexFile, bdiffs=1, oligos=yourOligosFile, checkorient=t). just used for user support map pairedPrimers, pairedBarcodes, reorientedPairedBarcodes, reorientedPairedPrimers, reversedPairedBarcodes, reverseedPairedPrimers; vector barcodeNames, primerNames; if(oligosfile != "") { createOligosGroup = getOligos(pairedPrimers, reorientedPairedPrimers, reverseedPairedPrimers, pairedBarcodes, reorientedPairedBarcodes, reversedPairedBarcodes, barcodeNames, primerNames); } int numPrimers = pairedPrimers.size(); TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes, true); int numBarcodes = pairedBarcodes.size(); TrimOligos* rtrimOligos = nullptr; if (reorient) { rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, reorientedPairedPrimers, reorientedPairedBarcodes, true); numBarcodes = reorientedPairedBarcodes.size(); numPrimers = reorientedPairedPrimers.size(); } ifstream in; util.openInputFile(findexfile, in); while (!in.eof()) { if (m->getControl_pressed()) { break; } bool ignore = false; FastqRead index(in, ignore, format); gobble(in); int success = 1; string trashCode = ""; string commentString = ""; int currentSeqsDiffs = 0; int barcodeIndex = 0; Sequence fSeq, rSeq; QualityScores* fQual = nullptr; QualityScores* rQual = nullptr; QualityScores* savedFQual = nullptr; QualityScores* savedRQual = nullptr; Sequence findexBarcode("findex", index.getSeq()); Sequence rindexBarcode("rindex", "NONE"); Sequence savedFindex("findex", index.getSeq()); Sequence savedRIndex("rindex", "NONE"); if(numBarcodes != 0){ vector results; results = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); success = results[0] + results[2]; commentString += "fbdiffs=" + toString(results[0]) + "(" + trimOligos.getCodeValue(results[1], bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + trimOligos.getCodeValue(results[3], bdiffs) + ") "; if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if (reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; string thiscommentString = ""; int thisCurrentSeqsDiffs = 0; int thisBarcodeIndex = 0; if(numBarcodes != 0){ vector results; results = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex); thisSuccess = results[0] + results[2]; thiscommentString += "fbdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + rtrimOligos->getCodeValue(results[3], bdiffs) + ") "; if(thisSuccess > bdiffs) { thisTrashCode += 'b'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if (thisTrashCode == "") { trashCode = thisTrashCode; success = thisSuccess; currentSeqsDiffs = thisCurrentSeqsDiffs; commentString = thiscommentString; barcodeIndex = thisBarcodeIndex; }else { trashCode += "(" + thisTrashCode + ")"; } } if (trashCode == "") { string thisGroup = ""; if(numBarcodes != 0){ thisGroup = barcodeNames[barcodeIndex]; } int pos = thisGroup.find("ignore"); if (pos == string::npos) { if (thisGroup != "") { groupMap[index.getName()] = thisGroup; map::iterator it = groupCounts.find(thisGroup); if (it == groupCounts.end()) { groupCounts[thisGroup] = 1; } else { groupCounts[it->first] ++; } } } } cout << index.getName() << '\t' << commentString << endl; } in.close(); int total = 0; if (groupCounts.size() != 0) { m->mothurOut("\nGroup count: \n"); } for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second) + "\n"); } if (total != 0) { m->mothurOut("\nTotal of all groups is " + toString(total) + "\n"); } exit(1); } catch(exception& e) { m->errorOut(e, "MakeContigsCommand", "debugFunction"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makecontigscommand.h000066400000000000000000000106371424121717000222450ustar00rootroot00000000000000#ifndef Mothur_makecontigscommand_h #define Mothur_makecontigscommand_h // // makecontigscommand.h // Mothur // // Created by Sarah Westcott on 5/15/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "sequence.hpp" #include "qualityscores.h" #include "alignment.hpp" #include "gotohoverlap.hpp" #include "needlemanoverlap.hpp" #include "trimoligos.h" #include "oligos.h" #include "fastqread.h" #include "kmeralign.h" #include "splitgroupscommand.h" #include "filefile.hpp" # define PROBABILITY(score) (pow(10.0, (-(double)(score)) / 10.0)) # define PHREDMAX 46 # define PHREDCLAMP(x) ((x) > PHREDMAX ? PHREDMAX : ((x) < 0 ? 0 : (x))) struct pairFastqRead { FastqRead forward; FastqRead reverse; FastqRead findex; FastqRead rindex; pairFastqRead()=default; pairFastqRead(FastqRead f, FastqRead r) : forward(f), reverse(r){}; pairFastqRead(FastqRead f, FastqRead r, FastqRead fi, FastqRead ri) : forward(f), reverse(r), findex(fi), rindex(ri) {}; ~pairFastqRead() = default;; }; /**************************************************************************************************/ class MakeContigsCommand : public Command { public: MakeContigsCommand(string); ~MakeContigsCommand(){} vector setParameters(); string getCommandName() { return "make.contigs"; } string getCommandCategory() { return "Sequence Processing"; } //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; } string getDescription() { return "description"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: #define perfectMatch 2 #define poundMatch 1 #define offByOne 3 char delim; bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk, reorient, gz, makeQualFile, screenSequences; string ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format, inputDir; float match, misMatch, gapOpen, gapExtend, maxee; int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq, kmerSize, nameType, offByOneTrimLength, maxAmbig, maxHomoP, maxLength; vector outputNames; set badNames; map groupCounts; map groupMap; unsigned long long processMultipleFileOption(string& outFastaFile, string&); unsigned long long processSingleFileOption(string& outFastaFile, string& outScrapFastaFile, string& outQualFile, string& outScrapQualFile, string& outMisMatchFile, string group); unsigned long long createProcesses(vector, vector, string, string, string, string, string, vector >, vector >, string, map& pairedPrimers, map& rpairedPrimers, map&, map& pairedBarcodes, map& rpairedBarcodes,map&, vector& barcodeNames, vector& primerNames); unsigned long long createProcessesGroups(vector< vector >, string compositeFastaFile, string compositeScrapFastaFile, string compositeQualFile, string compositeScrapQualFile, string compositeMisMatchFile, map& file2Groups); int createCountFile(string outputGroupFile, string resultFastafile); vector< vector > readFileNames(string, map&); bool getOligos(map& pairedPrimers, map& rpairedPrimers, map&, map& pairedBarcodes, map& rpairedBarcodes, map&, vector& barcodeNames, vector& primerNames); int setLines(vector, vector, vector& fastaFilePos, vector& qfileFilePos, char delim); //the delim let you know whether this is fasta and qual, or fastq and index. linePair entries will always be in sets of two. One for the forward and one for hte reverse. (fastaFilePos[0] - ffasta, fastaFilePos[1] - rfasta) - processor1 //bool testGZReadable(vector&, vector&, bool&); void debugFunction(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/makefastqcommand.cpp000077500000000000000000000164771424121717000222630ustar00rootroot00000000000000/* * makefastqcommand.cpp * mothur * * Created by westcott on 2/14/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "makefastqcommand.h" #include "sequence.hpp" #include "qualityscores.h" #include "fastqread.h" //********************************************************************************************************************** vector MakeFastQCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fastq",false,true,true); parameters.push_back(pfasta); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","fastq",false,true,true); parameters.push_back(pqfile); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; //initialize outputTypes vector tempOutNames; outputTypes["fastq"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeFastQCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.fastq command reads a fasta and quality file and creates a fastq file.\n"; helpString += "The make.fastq command parameters are fasta, qfile and format. fasta and qfile are required.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "The make.fastq command should be in the following format: make.fastq(qfile=yourQualityFile, fasta=yourFasta).\n"; helpString += "Example make.fastq(fasta=amazon.fasta, qfile=amazon.qual).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeFastQCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fastq") { pattern = "[filename],fastq"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeFastQCommand::MakeFastQCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; fastafile = ""; } else if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastafile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { abort = true; qualfile = ""; } else if (qualfile == "not found") { qualfile = current->getQualFile(); if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter.\n"); } else { m->mothurOut("You have no current qualfile and the qfile parameter is required.\n"); abort = true; } }else { current->setQualFile(qualfile); } if (outputdir == ""){ outputdir = util.hasPath(fastafile); } format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting." ); m->mothurOutEndLine(); abort=true; } } } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "MakeFastQCommand"); exit(1); } } //********************************************************************************************************************** int MakeFastQCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFile = getOutputFileName("fastq",variables); outputNames.push_back(outputFile); outputTypes["fastq"].push_back(outputFile); ofstream out; util.openOutputFile(outputFile, out); ifstream qFile; util.openInputFile(qualfile, qFile); ifstream fFile; util.openInputFile(fastafile, fFile); while (!fFile.eof() && !qFile.eof()) { if (m->getControl_pressed()) { break; } Sequence currSeq(fFile); gobble(fFile); QualityScores currQual(qFile); gobble(qFile); FastqRead fread(currSeq, currQual, format); fread.printFastq(out); } fFile.close(); qFile.close(); out.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "execute"); exit(1); } } //********************************************************************************************************************** string MakeFastQCommand::convertQual(vector qual) { try { string qualScores; for (int i = 0; i < qual.size(); i++) { int controlChar = int('!'); if (format == "illumina") { controlChar = int('@'); } int temp = qual[i] + controlChar; char qualChar = (char) temp; qualScores += qualChar; } return qualScores; } catch(exception& e) { m->errorOut(e, "MakeFastQCommand", "convertQual"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makefastqcommand.h000077500000000000000000000016331424121717000217140ustar00rootroot00000000000000#ifndef MAKEFASTQCOMMAND_H #define MAKEFASTQCOMMAND_H /* * makefastqcommand.h * mothur * * Created by westcott on 2/14/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class MakeFastQCommand : public Command { public: MakeFastQCommand(string); ~MakeFastQCommand(){} vector setParameters(); string getCommandName() { return "make.fastq"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.fastq"; } string getDescription() { return "creates a fastq file from a fasta and quality file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastafile, qualfile, format; bool abort; vector outputNames; string convertQual(vector); }; #endif mothur-1.48.0/source/commands/makefilecommand.cpp000077500000000000000000000453071424121717000220560ustar00rootroot00000000000000// // makefilecommand.cpp // Mothur // // Created by Sarah Westcott on 6/24/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "makefilecommand.h" //********************************************************************************************************************** vector MakeFileCommand::setParameters(){ try { CommandParameter ptype("type", "Multiple", "fastq-gz", "fastq", "", "", "","",false,false); parameters.push_back(ptype); CommandParameter pnumcols("numcols", "Multiple", "2-3", "3", "", "", "","",false,false, true); parameters.push_back(pnumcols); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pprefix("prefix", "String", "", "", "", "", "","",false,false); parameters.push_back(pprefix); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter pdelim("delim", "String", "", "_", "", "", "","",false,false); parameters.push_back(pdelim); abort = false; calledHelp = false; vector tempOutNames; outputTypes["file"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeFileCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.file command takes a input directory and creates a file file containing the fastq or gz files in the directory.\n"; helpString += "The make.file command parameters are inputdir, numcols, type and prefix. inputdir is required.\n"; helpString += "May create more than one file. Mothur will attempt to match paired files. \n"; helpString += "The type parameter allows you to set the type of files to look for. Options are fastq or gz. Default=fastq. \n"; helpString += "The numcols parameter allows you to set number of columns you mothur to make in the file. Default=3, meaning groupName forwardFastq reverseFastq. The groupName is made from the beginning part of the forwardFastq file. Everything up to the first '_' or if no '_' is found then the root of the forwardFastq filename.\n"; helpString += "The prefix parameter allows you to enter your own prefix for the output filename. Default=stability."; helpString += "The delim parameter allow you to enter the character you would like to use to create the sample name. Default='_'. For example, M6D7_S163_L001_R2_001.fastq.gz would produce the sample name M6D7. Set delim=* to indicate you want mothur to create unique names for each file pair. (no pooling)\n"; helpString += "The make.file command should be in the following format: \n"; helpString += "make.file(inputdir=yourInputDirectory). \n"; helpString += "Example make.file(inputdir=fastqFiles)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeFileCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "file") { pattern = "[filename],[tag],files-[filename],files"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeFileCommand::MakeFileCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; m->mothurOut("[ERROR]: The inputdir parameter is required, aborting.\n"); abort = true; } else { if (util.dirCheckExists(inputDir)) {} // all set else { abort = true; } } if (outputdir == ""){ outputdir = inputDir; } //if the user changes the input directory command factory will send this info to us in the output parameter typeFile = validParameter.valid(parameters, "type"); if (typeFile == "not found"){ typeFile = "fastq"; } if ((typeFile != "fastq") && (typeFile != "gz")) { m->mothurOut(typeFile + " is not a valid type. Options are fastq or gz. I will use fastq.\n"); typeFile = "fastq"; } string temp = validParameter.valid(parameters, "numcols"); if(temp == "not found"){ temp = "3"; } if ((temp != "2") && (temp != "3")) { m->mothurOut(temp + " is not a valid numcols. Options are 2 or 3. I will use 3.\n"); temp = "3"; } util.mothurConvert(temp, numCols); prefix = validParameter.valid(parameters, "prefix"); if (prefix == "not found") { prefix = "stability"; } delim = validParameter.valid(parameters, "delim"); if (delim == "not found") { delim = "_"; } } } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "MakeFileCommand"); exit(1); } } //********************************************************************************************************************** int MakeFileCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //find all .fastq files string tempFile = inputDir + "fileList.temp"; fillAccnosFile(tempFile); //read in list of files vector fastqFiles; util.readAccnos(tempFile, fastqFiles, "no error"); util.mothurRemove(tempFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: Found " + toString(fastqFiles.size()) + " files of type " + typeFile + ".\n"); for (int i = 0; i < fastqFiles.size(); i++) { m->mothurOut("[DEBUG]: " + toString(i) + " = " + fastqFiles[i] + "\n");} } if (fastqFiles.size() == 0) { m->mothurOut("[WARNING]: Unable to find any " + typeFile + " files in your directory.\n"); } else { //sort into alpha order to put pairs togther if they exist sort(fastqFiles.begin(), fastqFiles.end()); vector< vector > paired; vector singles; set groups; string lastFile = ""; for (int i = 0; i < fastqFiles.size()-1; i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: File " + toString(i) + " = " + fastqFiles[i] + ".\n"); } if (m->getControl_pressed()) { break; } string simpleName1 = util.getRootName(util.getSimpleName(fastqFiles[i])); string simpleName2 = util.getRootName(util.getSimpleName(fastqFiles[i+1])); //possible pair if (simpleName1.length() == simpleName2.length()) { int numDiffs = 0; for (int j = 0; j < simpleName1.length(); j++) { if (numDiffs > 1) { break; } else if (simpleName1[j] != simpleName2[j]) { numDiffs++; } } if (numDiffs > 1) { singles.push_back(util.getSimpleName(fastqFiles[i])); lastFile = fastqFiles[i]; } else { //only one diff = paired files vector temp; temp.push_back(util.getSimpleName(fastqFiles[i])); temp.push_back(util.getSimpleName(fastqFiles[i+1])); lastFile = fastqFiles[i+1]; if (m->getDebug()) { m->mothurOut("[DEBUG]: Pairing " + fastqFiles[i] + " with " + fastqFiles[i+1] + ".\n"); } paired.push_back(temp); i++; } }else{ if (m->getDebug()) { m->mothurOut("[DEBUG]: Adding single " + fastqFiles[i] + ".\n"); } singles.push_back(util.getSimpleName(fastqFiles[i])); lastFile = fastqFiles[i]; } } if (lastFile != fastqFiles[fastqFiles.size()-1]) { if (m->getDebug()) { m->mothurOut("[DEBUG]: Adding single " + fastqFiles[fastqFiles.size()-1] + ".\n"); } singles.push_back(util.getSimpleName(fastqFiles[fastqFiles.size()-1])); } if (singles.size() != 0) { map variables; variables["[filename]"] = outputdir + prefix + "."; if (paired.size() != 0) { variables["[tag]"] = "single"; } string filename = getOutputFileName("file",variables); ofstream out; util.openOutputFile(filename, out); for (int i = 0; i < singles.size(); i++) { out << singles[i] << endl; } out.close(); if (util.isBlank(filename)) { util.mothurRemove(filename); } else { outputNames.push_back(filename); outputTypes["file"].push_back(filename); m->mothurOut("\n[WARNNG]: mothur found unpaired files in your input directory. Outputting list of filenames to " + filename + " for your review.\n\n"); } } //generates unique group names if (numCols == 3) { paired = findGroupNames(paired); } if (paired.size() != 0) { map variables; variables["[filename]"] = outputdir + prefix + "."; string filename = getOutputFileName("file",variables); ofstream out; util.openOutputFile(filename, out); outputNames.push_back(filename); outputTypes["file"].push_back(filename); current->setFileFile(filename); for (int i = 0; i < paired.size(); i++) { for (int j = 0; j < paired[i].size(); j++) { out << paired[i][j] << '\t'; } out << endl; } out.close(); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "execute"); exit(1); } } //********************************************************************************************************************** //groupName defaults to "noGroup"+toString(i); vector< vector > MakeFileCommand::findGroupNames(vector< vector > paired){ try { vector< vector > results; results.resize(paired.size()); if (delim == "*") { //remove any "words" in filenames that is the same in all filenames separated by delim(_ .) //MI.M00833_0261.001.FLD0207.TRIN-META_16S_R2.fastq //MI.M00833_0261.001.FLD0223.ERIFF-META_16S_R2.fastq //would become... //FLD0207.TRIN-META //FLD0223.ERIFF-META //split all forward names into pieces vector > words; words.resize(paired.size()); map > posToWord; for (int i = 0; i < paired.size(); i++) { if (m->getControl_pressed()) { break; } string filename = util.getRootName(util.getSimpleName(paired[i][0])); int pos = 0; string individual = ""; for(int j=0;j >::iterator it = posToWord.find(pos); if (it != posToWord.end()) { posToWord[pos].insert(individual); } else { set temp; temp.insert(individual); posToWord[pos] = temp; } individual = ""; pos++; } else{ individual += filename[j]; } } if (!util.allSpaces(individual)) { words[i].push_back(individual); map >::iterator it = posToWord.find(pos); if (it != posToWord.end()) { posToWord[pos].insert(individual); } else { set temp; temp.insert(individual); posToWord[pos] = temp; } } } //remove duplicate pieces set goodIndexes; for (map >::iterator it = posToWord.begin(); it != posToWord.end(); it++) { set w = it->second;; if (w.size() != 1) { goodIndexes.insert(it->first); } } set groups; for (int i = 0; i < words.size(); i++) { //assemble groupNames string groupName = ""; for (int j = 0; j < words[i].size(); j++) { //include word if (goodIndexes.count(j) != 0) { groupName += words[i][j] + "_"; } } if (groupName != "") { groupName = groupName.substr(0, groupName.length()-1); } //is this name unique if (groups.count(groupName) == 0) { util.checkName(groupName); groups.insert(groupName); } else { groupName = "Group_"+ toString(i); util.checkName(groupName); groups.insert(groupName); } results[i].push_back(groupName); results[i].push_back(paired[i][0]); results[i].push_back(paired[i][1]); } }else { //separate by the user selected deliminator. default='_' set groups; for (int i = 0; i < paired.size(); i++) { string groupName = "Group_" + toString(i); string filename = util.getSimpleName(paired[i][0]); int pos = filename.find(delim); if (pos != string::npos) { groupName = filename.substr(0, pos); } if (groups.count(groupName) == 0) { util.checkName(groupName); groups.insert(groupName); } else { //look for another delim string tempFilename = filename.substr(pos+1); //grab rest of name pos = tempFilename.find(delim); if (pos != string::npos) { groupName += "_" + tempFilename.substr(0, pos); if (groups.count(groupName) != 0) { groupName += "_"+ toString(i); } //already have this name } else { groupName += "_"+ toString(i); } util.checkName(groupName); groups.insert(groupName); } results[i].push_back(groupName); results[i].push_back(paired[i][0]); results[i].push_back(paired[i][1]); } } return results; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "findGroupName"); exit(1); } } //********************************************************************************************************************** int MakeFileCommand::fillAccnosFile(string tempFile){ try { string findCommand = ""; string tempOut = tempFile; tempFile = "\"" + tempFile + "\""; string wrappedInput = "\"" + inputDir + "\""; #if defined NON_WINDOWS findCommand = "ls " + wrappedInput + "*." + typeFile + " > " + tempFile; //findCommand = "find \"" + inputDir.substr(0, inputDir.length()-1) + "\" -maxdepth 1 -name \"*." + typeFile + "\" > " + tempFile; if (m->getDebug()) { m->mothurOut(findCommand + "\n"); } system(findCommand.c_str()); #else //use ls command findCommand = "dir /B " + wrappedInput + "*." + typeFile + " > " + tempFile; //findCommand = "dir /B \"" + inputDir.substr(0, inputDir.length()-1) + "\\*.\"" + typeFile + " > " + tempFile + "\""; if (m->getDebug()) { m->mothurOut(findCommand + "\n"); } system(findCommand.c_str()); tempOut += ".temp"; tempFile = tempFile.substr(1, tempFile.length()-2); //remove "" ofstream out; util.openOutputFile(tempOut, out); ifstream in; util.openInputFile(tempFile, in); string junk, filename; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> filename; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: Found file " + filename + ".\n"); } //ignore hidden files if (filename[0] != '.') { out << filename << endl; } } in.close(); out.close(); util.mothurRemove(tempFile); util.renameFile(tempOut, tempFile); #endif return 0; } catch(exception& e) { m->errorOut(e, "MakeFileCommand", "fillAccnosFile"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makefilecommand.h000077500000000000000000000021331424121717000215110ustar00rootroot00000000000000// // makefilecommand.h // Mothur // // Created by Sarah Westcott on 6/24/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__makefilecommand__ #define __Mothur__makefilecommand__ #include "command.hpp" class MakeFileCommand : public Command { public: MakeFileCommand(string); ~MakeFileCommand(){} vector setParameters(); string getCommandName() { return "make.file"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.file"; } string getDescription() { return "creates a file file containing fastq filenames"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string inputDir, typeFile, prefix, delim; vector outputNames; int numCols; bool abort; vector< vector > findGroupNames(vector< vector > paired); int fillAccnosFile(string tempFile); }; #endif /* defined(__Mothur__makefilecommand__) */ mothur-1.48.0/source/commands/makegroupcommand.cpp000066400000000000000000000221021424121717000222540ustar00rootroot00000000000000/* * makegroupcommand.cpp * Mothur * * Created by westcott on 5/7/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "makegroupcommand.h" #include "sequence.hpp" #include "counttable.h" //********************************************************************************************************************** vector MakeGroupCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","group",false,true,true); parameters.push_back(pfasta); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups); CommandParameter poutput("output", "String", "", "", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pformat("format", "Multiple", "count-group", "count", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeGroupCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.group (also called make.count) command reads a fasta file or series of fasta files and creates a group file or count file.\n"; helpString += "The make.group command parameters are fasta, groups, format and output. Fasta and groups are required.\n"; helpString += "The output parameter allows you to specify the name of group file or count file created. \n"; helpString += "The format parameter allows you to specify whether the outputtted file is a group file or count file. Default=count. \n"; helpString += "The make.group command should be in the following format: \n"; helpString += "make.group(fasta=yourFastaFiles, groups=yourGroups). \n"; helpString += "Example make.group(fasta=seqs1.fasta-seq2.fasta-seqs3.fasta, groups=A-B-C)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeGroupCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "group") { pattern = "[filename],groups"; } else if (type == "count") { pattern = "[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeGroupCommand::MakeGroupCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } fastaFileNames = validParameter.validFiles(parameters, "fasta"); if (fastaFileNames.size() != 0) { if (fastaFileNames[0] == "not open") { abort = true; } else { current->setFastaFile(fastaFileNames[0]); } } //make sure there is at least one valid file left if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files.\n"); abort = true; } output = validParameter.validPath(parameters, "output"); if (output == "not found") { output = ""; } format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "count"; } if ((format != "count") && (format != "group")) { m->mothurOut("\n[WARNING]: invalid format option: choices are count or group, using count.\n"); format="count"; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { m->mothurOut("groups is a required parameter for the make.group command.\n"); abort = true; } else { util.splitAtDash(groups, groupsNames); } if (groupsNames.size() != fastaFileNames.size()) { m->mothurOut("You do not have the same number of valid fastfile files as groups. This could be because we could not open a fastafile.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "MakeGroupCommand"); exit(1); } } //********************************************************************************************************************** int MakeGroupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } util.checkGroupNames(groupsNames); map seqGroup; map groupCounts; for (int i = 0; i < fastaFileNames.size(); i++) { if (m->getControl_pressed()) { break; } m->mothurOutJustToScreen("\nAssigning sequences from file " + fastaFileNames[i] + " to group " + groupsNames[i] + ":\t"); ifstream in; util.openInputFile(fastaFileNames[i], in); long long count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { seqGroup[seq.getName()] = groupsNames[i]; count++; } } in.close(); m->mothurOutJustToScreen(toString(count) + " sequences assigned to group " + groupsNames[i] + "\n"); groupCounts[groupsNames[i]] = count; } if (m->getControl_pressed()) { return 0; } //if user provided output filename, then use it string outputFileName = util.getFullPathName(output); //if no output filename given, create one if (output == "") { map variables; if (outputdir == "") { outputdir = util.hasPath(fastaFileNames[0]); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastaFileNames[0])); if (fastaFileNames.size() > 1) { variables["[filename]"] = outputdir + "merge."; } outputFileName = getOutputFileName(format,variables); } outputNames.push_back(outputFileName); outputTypes[format].push_back(outputFileName); if (format == "count") { CountTable ct; ct.createTable(seqGroup); ct.printCompressedTable(outputFileName); }else{ ofstream out; util.openOutputFile(outputFileName, out); for (map::iterator it = seqGroup.begin(); it != seqGroup.end(); it++) { out << it->first << '\t' << it->second << endl; } out.close(); } long long total = 0; if (groupCounts.size() != 0) { m->mothurOut("\nGroup count: \n"); } for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second) + "\n"); } if (total != 0) { m->mothurOut("\nTotal of all groups is " + toString(total) + "\n"); } m->mothurOut("\nOutput File Names: " + outputFileName + "\n\n"); //set group file as new current groupfile string currentName = ""; itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "MakeGroupCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makegroupcommand.h000077500000000000000000000015671424121717000217400ustar00rootroot00000000000000#ifndef MAKEGROUPCOMMAND_H #define MAKEGROUPCOMMAND_H /* * makegroupcommand.h * Mothur * * Created by westcott on 5/7/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" class MakeGroupCommand : public Command { public: MakeGroupCommand(string); ~MakeGroupCommand(){} vector setParameters(); string getCommandName() { return "make.group"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.group"; } string getDescription() { return "creates a group file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastaFileName, groups, output, format; vector fastaFileNames; vector groupsNames, outputNames; bool abort; }; #endif mothur-1.48.0/source/commands/makelefsecommand.cpp000077500000000000000000000514531424121717000222340ustar00rootroot00000000000000// // makelefse.cpp // Mothur // // Created by SarahsWork on 6/3/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "makelefsecommand.h" #include "designmap.h" //********************************************************************************************************************** vector MakeLefseCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["lefse"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeLefseCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n"; helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label. The shared or relabund are required.\n"; helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n"; helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n"; helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n"; helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n"; helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n"; helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n"; helpString += "The make.lefse command should be in the following format: make.lefse(shared=yourSharedFile)\n"; helpString += "make.lefse(shared=final.an.shared)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeLefseCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "lefse") { pattern = "[filename],[distance],lefse"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeLefseCommand::MakeLefseCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { abort = true; } else if (designfile == "not found") { designfile = ""; } else { current->setDesignFile(designfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { current->setRelAbundFile(relabundfile); } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared or relabund.\n"); abort = true; } } } if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file.\n"); abort = true; } scale = validParameter.valid(parameters, "scale"); if (scale == "not found") { scale = "totalgroup"; } if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) { m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand"); exit(1); } } //********************************************************************************************************************** int MakeLefseCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } map consTax; if (constaxonomyfile != "") { util.readConsTax(constaxonomyfile, consTax); } if (m->getControl_pressed()) { return 0; } if (sharedfile != "") { inputFile = sharedfile; SharedRAbundFloatVectors* lookup = getSharedRelabund(); runRelabund(consTax, lookup); }else { inputFile = relabundfile; SharedRAbundFloatVectors* lookup = getRelabund(); runRelabund(consTax, lookup); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "execute"); exit(1); } } //********************************************************************************************************************** int MakeLefseCommand::runRelabund(map& consTax, SharedRAbundFloatVectors*& lookup){ try { if (outputdir == "") { outputdir = util.hasPath(inputFile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFile)); variables["[distance]"] = lookup->getLabel(); string outputFile = getOutputFileName("lefse",variables); outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile); ofstream out; util.openOutputFile(outputFile, out); DesignMap* designMap = nullptr; vector namesOfGroups = lookup->getNamesGroups(); if (designfile != "") { designMap = new DesignMap(designfile); if (m->getControl_pressed()) { out.close(); delete designMap; return 0; } vector categories = designMap->getNamesOfCategories(); if (categories.size() > 3) { m->mothurOut("\n[NOTE]: LEfSe input files allow for a class, subclass and subject. More than 3 categories can cause formatting errors.\n\n"); } for (int j = 0; j < categories.size(); j++) { out << categories[j]; for (int i = 0; i < namesOfGroups.size()-1; i++) { if (m->getControl_pressed()) { out.close(); delete designMap; return 0; } string value = designMap->get(namesOfGroups[i], categories[j]); if (value == "not found") { m->mothurOut("[ERROR]: " + namesOfGroups[i] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else { out << '\t' << value; } } string value = designMap->get(namesOfGroups[namesOfGroups.size()-1], categories[j]); if (value == "not found") { m->mothurOut("[ERROR]: " + namesOfGroups[namesOfGroups.size()-1] + " is not in your design file, please correct.\n"); m->setControl_pressed(true); }else { out << '\t' << value; } out << endl; } } out << "group"; for (int i = 0; i < namesOfGroups.size(); i++) { out << '\t' << namesOfGroups[i]; } out << endl; for (int i = 0; i < lookup->getNumBins(); i++) { //process each otu if (m->getControl_pressed()) { break; } string nameOfOtu = lookup->getOTUName(i); if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy int simpleLabel; util.mothurConvert(util.getSimpleLabel(nameOfOtu), simpleLabel); map::iterator it = consTax.find(simpleLabel); if (it != consTax.end()) { nameOfOtu = it->second.taxonomy; //add sanity check abundances here?? string fixedName = ""; //remove confidences and change ; to | util.removeConfidences(nameOfOtu); for (int j = 0; j < nameOfOtu.length(); j++) { if (nameOfOtu[j] == ';') { fixedName += '|'; } else { fixedName += nameOfOtu[j]; } } nameOfOtu = fixedName + lookup->getOTUName(i) + "|"; }else { m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->setControl_pressed(true); } } //print name out << nameOfOtu; //print out relabunds for each otu vector abunds = lookup->getOTU(i); for (int j = 0; j < abunds.size(); j++) { out << '\t' << abunds[j]; } out << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "execute"); exit(1); } } //********************************************************************************************************************** SharedRAbundFloatVectors* MakeLefseCommand::getSharedRelabund(){ try { InputData input(sharedfile, "sharedfile", Groups); SharedRAbundVectors* templookup = input.getSharedRAbundVectors(); Groups = templookup->getNamesGroups(); string lastLabel = templookup->getLabel(); if (label == "") { label = lastLabel; } else { //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((templookup != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { delete templookup; return nullptr; } if(labels.count(templookup->getLabel()) == 1){ processedLabels.insert(templookup->getLabel()); userLabels.erase(templookup->getLabel()); break; } if ((util.anyLabelsToProcess(templookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = templookup->getLabel(); delete templookup; templookup = input.getSharedRAbundVectors(lastLabel); processedLabels.insert(templookup->getLabel()); userLabels.erase(templookup->getLabel()); //restore real lastlabel to save below templookup->setLabels(saveLabel); break; } lastLabel = templookup->getLabel(); //get next line to process //prevent memory leak delete templookup; templookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { delete templookup; return nullptr; } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete templookup; templookup = input.getSharedRAbundVectors(lastLabel); } } vector data = templookup->getSharedRAbundVectors(); vector otuNames = templookup->getOTUNames(); delete templookup; SharedRAbundFloatVectors* lookup = new SharedRAbundFloatVectors(); //convert to relabund for (int i = 0; i < data.size(); i++) { SharedRAbundFloatVector* rel = new SharedRAbundFloatVector(); rel->setGroup(data[i]->getGroup()); rel->setLabel(data[i]->getLabel()); for (int j = 0; j < data[i]->getNumBins(); j++) { if (m->getControl_pressed()) { for (int k = 0; k < data.size(); k++) { delete data[k]; } return lookup; } int abund = data[i]->get(j); float relabund = 0.0; if (scale == "totalgroup") { relabund = abund / (float) data[i]->getNumSeqs(); }else if (scale == "totalotu") { //calc the total in this otu int totalOtu = 0; for (int l = 0; l < data.size(); l++) { totalOtu += data[l]->get(j); } relabund = abund / (float) totalOtu; }else if (scale == "averagegroup") { relabund = abund / (float) (data[i]->getNumSeqs() / (float) data[i]->getNumBins()); }else if (scale == "averageotu") { //calc the total in this otu int totalOtu = 0; for (int l = 0; l < data.size(); l++) { totalOtu += data[l]->get(j); } float averageOtu = totalOtu / (float) data.size(); relabund = abund / (float) averageOtu; }else{ m->mothurOut(scale + " is not a valid scaling option.\n"); m->setControl_pressed(true); } rel->push_back(relabund); } lookup->push_back(rel); } for (int k = 0; k < data.size(); k++) { delete data[k]; } data.clear(); lookup->setOTUNames(otuNames); lookup->eliminateZeroOTUS(); return lookup; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "getSharedRelabund"); exit(1); } } //********************************************************************************************************************** SharedRAbundFloatVectors* MakeLefseCommand::getRelabund(){ try { InputData input(relabundfile, "relabund", Groups); SharedRAbundFloatVectors* lookupFloat = input.getSharedRAbundFloatVectors(); string lastLabel = lookupFloat->getLabel(); Groups = lookupFloat->getNamesGroups(); if (label == "") { label = lastLabel; return lookupFloat; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return lookupFloat; } if(labels.count(lookupFloat->getLabel()) == 1){ processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); break; } if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookupFloat->getLabel(); delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(lastLabel); processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); //restore real lastlabel to save below lookupFloat->setLabels(saveLabel); break; } lastLabel = lookupFloat->getLabel(); //get next line to process //prevent memory leak delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(); } if (m->getControl_pressed()) { return lookupFloat; } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookupFloat; lookupFloat = input.getSharedRAbundFloatVectors(lastLabel); } return lookupFloat; } catch(exception& e) { m->errorOut(e, "MakeLefseCommand", "getRelabund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/makelefsecommand.h000077500000000000000000000027731424121717000217020ustar00rootroot00000000000000// // makelefse.h // Mothur // // Created by SarahsWork on 6/3/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef __Mothur__makelefse__ #define __Mothur__makelefse__ #include "mothurout.h" #include "command.hpp" #include "inputdata.h" #include "phylosummary.h" /**************************************************************************************************/ class MakeLefseCommand : public Command { public: MakeLefseCommand(string); ~MakeLefseCommand(){} vector setParameters(); string getCommandName() { return "make.lefse"; } string getCommandCategory() { return "General"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload http://www.mothur.org/wiki/Make.lefse"; } string getDescription() { return "creates LEfSe input file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, otulabel, hasGroupInfo; vector outputNames, Groups; string sharedfile, designfile, constaxonomyfile, relabundfile, scale, label, inputFile; int runRelabund(map&, SharedRAbundFloatVectors*&); SharedRAbundFloatVectors* getRelabund(); SharedRAbundFloatVectors* getSharedRelabund(); }; /**************************************************************************************************/ #endif /* defined(__Mothur__makelefse__) */ mothur-1.48.0/source/commands/makelookupcommand.cpp000077500000000000000000000773301424121717000224510ustar00rootroot00000000000000// // makelookupcommand.cpp // Mothur // // Created by SarahsWork on 5/14/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "makelookupcommand.h" //********************************************************************************************************************** vector MakeLookupCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow); CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors); CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode); CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey); CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["lookup"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MakeLookupCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n"; helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n"; helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n"; helpString += "The flow parameter is used to provide the flow data. It is required.\n"; helpString += "The error parameter is used to provide the error summary. It is required.\n"; helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n"; helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n"; helpString += "The threshold parameter is ....Default=10000.\n"; helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n"; helpString += "new(...)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MakeLookupCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "lookup") { pattern = "[filename],lookup"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MakeLookupCommand::MakeLookupCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; errorFileName = validParameter.validFile(parameters, "error"); if (errorFileName == "not open") { errorFileName = ""; abort = true; } else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required.\n"); abort = true; } flowFileName = validParameter.validFile(parameters, "flow"); if (flowFileName == "not open") { flowFileName = ""; abort = true; } else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required.\n"); abort = true; } else { current->setFlowFile(flowFileName); } refFastaFileName = validParameter.validFile(parameters, "reference"); if (refFastaFileName == "not open") { abort = true; } else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(flowFileName); } string temp = validParameter.valid(parameters, "threshold"); if (temp == "not found"){ temp = "10000"; } util.mothurConvert(temp, thresholdCount); barcodeSequence = validParameter.valid(parameters, "barcode"); if (barcodeSequence == "not found"){ barcodeSequence = "AACCGTGTC"; } keySequence = validParameter.valid(parameters, "key"); if (keySequence == "not found"){ keySequence = "TCAG"; } temp = validParameter.valid(parameters, "order"); if (temp == "not found"){ temp = "A"; } if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } else { if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; } else if(toupper(temp[0]) == 'B'){ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; } else if(toupper(temp[0]) == 'I'){ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; } else { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } } } } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand"); exit(1); } } //********************************************************************************************************************** int MakeLookupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); double gapOpening = 10; int maxHomoP = 101; vector > penaltyMatrix; penaltyMatrix.resize(maxHomoP); for(int i=0;i > refFlowgrams; while(!refFASTA.eof()){ if (m->getControl_pressed()) { refFASTA.close(); return 0; } Sequence seq(refFASTA); gobble(refFASTA); if (m->getDebug()) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); } string fullSequence = keySequence + barcodeSequence + seq.getAligned(); // * concatenate the keySequence, barcodeSequence, and // referenceSequences refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); // * translate concatenated sequences into flowgram } refFASTA.close(); vector > lookupTable; lookupTable.resize(1000); for(int i=0;i<1000;i++){ lookupTable[i].resize(11, 0); } if (m->getDebug()) { m->mothurOut("[DEBUG]: here .\n"); } //Loop through each sequence in the flow file and the error summary file. ifstream flowFile; util.openInputFile(flowFileName, flowFile); int numFlows; flowFile >> numFlows; if (m->getDebug()) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) + ".\n"); } ifstream errorFile; util.openInputFile(errorFileName, errorFile); util.getline(errorFile); //grab headers string errorQuery, flowQuery, referenceName, dummy; string chimera; float intensity; vector std; std.resize(11, 0); while(errorFile && flowFile){ if (m->getControl_pressed()) { errorFile.close(); flowFile.close(); return 0; } // * if it's chimeric, chuck it errorFile >> errorQuery >> referenceName; for(int i=2;i<40;i++){ errorFile >> dummy; } errorFile >> chimera; if(chimera == "2"){ util.getline(flowFile); } else{ flowFile >> flowQuery >> dummy; if(flowQuery != errorQuery){ m->mothurOut("[ERROR]: " + flowQuery + " != " + errorQuery + "\n"); } map >::iterator it = refFlowgrams.find(referenceName); // * compare sequence to its closest reference if (it == refFlowgrams.end()) { m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n"); util.getline(flowFile); gobble(flowFile); }else { vector refFlow = it->second; vector flowgram; flowgram.resize(numFlows); if (m->getDebug()) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery + ".\t" + "refName " + referenceName+ ".\n"); } for(int i=0;i> intensity; flowgram[i] = intensity;// (int)round(100 * intensity); } gobble(flowFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: before align.\n"); } alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder); if (m->getDebug()) { m->mothurOut("[DEBUG]: after align.\n"); } if (m->getControl_pressed()) { errorFile.close(); flowFile.close(); return 0; } for(int i=0;i 1000){count = 999;} if(abs(flowgram[i]-refFlow[i])<=0.50){ lookupTable[count][int(refFlow[i])]++; // * build table std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count); } } } } gobble(errorFile); gobble(flowFile); } errorFile.close(); flowFile.close(); //get probabilities vector counts; counts.resize(11, 0); int totalCount = 0; for(int i=0;i<1000;i++){ for(int j=0;j<11;j++){ counts[j] += lookupTable[i][j]; totalCount += lookupTable[i][j]; } } int N = 11; for(int i=0;i<11;i++){ if(counts[i] < thresholdCount){ N = i; break; } //bring back std[i] = sqrt(std[i]/(double)(counts[i])); //bring back } regress(std, N); //bring back if (m->getControl_pressed()) { return 0; } double minProbability = 0.1 / (double)totalCount; //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5); for(int i=0;i<1000;i++){ if (m->getControl_pressed()) { return 0; } for(int j=0;j minProbability){ lookupTable[i][j] = -log(normalProbability); } else{ lookupTable[i][j] = -log(minProbability); } } } //calculate the probability of each homopolymer length vector negLogHomoProb; negLogHomoProb.resize(11, 0.00); //bring back for(int i=0;igetControl_pressed()) { return 0; } //output data table. column one is the probability of each homopolymer length map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(flowFileName)); string outputFile = getOutputFileName("lookup",variables); outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile); ofstream lookupFile; util.openOutputFile(outputFile, lookupFile); lookupFile.precision(8); for(int j=0;j<11;j++){ // lookupFile << counts[j]; lookupFile << showpoint << negLogHomoProb[j]; //bring back for(int i=0;i<1000;i++){ lookupFile << '\t' << lookupTable[i][j]; } lookupFile << endl; } lookupFile.close(); m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n"); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "execute"); exit(1); } } //****************************************************************************************************************************** vector MakeLookupCommand::convertSeqToFlow(string sequence, string order){ try { int seqLength = (int)sequence.length(); int numFlows = (int)order.length(); vector flowgram; int orderIndex = 0; int sequenceIndex = 0; while(orderIndex < numFlows && sequenceIndex < seqLength){ if (m->getControl_pressed()) { return flowgram; } int homopolymerLength = 1; char base = sequence[sequenceIndex]; while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){ homopolymerLength++; sequenceIndex++; } sequenceIndex++; for(int i=orderIndex; ierrorOut(e, "MakeLookupCommand", "convertSeqToFlow"); exit(1); } } //****************************************************************************************************************************** int MakeLookupCommand::alignFlowGrams(vector& flowgram, vector& refFlow, double gapOpening, vector > penaltyMatrix, string flowOrder){ try { int numQueryFlows = (int)flowgram.size(); int numRefFlows = (int)refFlow.size(); vector > scoreMatrix; scoreMatrix.resize(numQueryFlows+1); vector > directMatrix; directMatrix.resize(numQueryFlows+1); for(int i=0;i<=numQueryFlows;i++){ if (m->getControl_pressed()) { return 0; } scoreMatrix[i].resize(numRefFlows+1, 0.00); directMatrix[i].resize(numRefFlows+1, 'x'); scoreMatrix[i][0] = i * gapOpening; directMatrix[i][0] = 'u'; } for(int i=0;i<=numRefFlows;i++){ scoreMatrix[0][i] = i * gapOpening; directMatrix[0][i] = 'l'; } for(int i=1;i<=numQueryFlows;i++){ for(int j=1;j<=numRefFlows;j++){ if (m->getControl_pressed()) { return 0; } double diagonal = 1000000000; if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){ diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]]; } double up = scoreMatrix[i-1][j] + gapOpening; double left = scoreMatrix[i][j-1] + gapOpening; double minScore = diagonal; char direction = 'd'; if(left < diagonal && left < up){ minScore = left; direction = 'l'; } else if(up < diagonal && up < left){ minScore = up; direction = 'u'; } scoreMatrix[i][j] = minScore; directMatrix[i][j] = direction; } } int minRowIndex = numQueryFlows; double minRowScore = scoreMatrix[numQueryFlows][numRefFlows]; for(int i=0;igetControl_pressed()) { return 0; } if(scoreMatrix[i][numRefFlows] < minRowScore){ minRowScore = scoreMatrix[i][numRefFlows]; minRowIndex = i; } } int minColumnIndex = numRefFlows; double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows]; for(int i=0;igetControl_pressed()) { return 0; } if(scoreMatrix[numQueryFlows][i] < minColumnScore){ minColumnScore = scoreMatrix[numQueryFlows][i]; minColumnIndex = i; } } int i=minRowIndex; int j= minColumnIndex; vector newFlowgram; vector newRefFlowgram; while(i > 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(directMatrix[i][j] == 'd'){ newFlowgram.push_back(flowgram[i-1]); newRefFlowgram.push_back(refFlow[j-1]); i--; j--; } else if(directMatrix[i][j] == 'l'){ newFlowgram.push_back(0); newRefFlowgram.push_back(refFlow[j-1]); j--; } else if(directMatrix[i][j] == 'u'){ newFlowgram.push_back(flowgram[i-1]); newRefFlowgram.push_back(0); i--; } } flowgram = newFlowgram; refFlow = newRefFlowgram; return 0; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "alignFlowGrams"); exit(1); } } //****************************************************************************************************************************** int MakeLookupCommand::regress(vector& data, int N){ try { //fit data for larger values of N double xMean = 0; double yMean = 0; for(int i=1;igetControl_pressed()) { return 0; } xMean += i; yMean += data[i]; } xMean /= (N-1); yMean /= (N-1); double numerator = 0; double denomenator = 0; for(int i=1;igetControl_pressed()) { return 0; } numerator += (i-xMean)*(data[i] - yMean); denomenator += (i-xMean) * (i-xMean); } double slope = numerator / denomenator; double intercept = yMean - slope * xMean; for(int i=N;i<11;i++){ data[i] = intercept + i * slope; } return 0; } catch(exception& e) { m->errorOut(e, "MakeLookupCommand", "regress"); exit(1); } } //****************************************************************************************************************************** //********************************************************************************************************************** mothur-1.48.0/source/commands/makelookupcommand.h000077500000000000000000000033661424121717000221140ustar00rootroot00000000000000// // makelookupcommand.h // Mothur // // Created by SarahsWork on 5/14/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_makelookupcommand_h #define Mothur_makelookupcommand_h #include "command.hpp" #include "sequence.hpp" /**************************************************************************************************/ class MakeLookupCommand : public Command { public: MakeLookupCommand(string); ~MakeLookupCommand(){} vector setParameters(); string getCommandName() { return "make.lookup"; } string getCommandCategory() { return "Sequence Processing"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "Quince, C., A. Lanzén, T. P. Curtis, R. J. Davenport, N. Hall, I. M. Head, L. F. Read, and W. T. Sloan. 2009. Accurate determination of microbial diversity from 454 pyrosequencing data. Nat Methods 6:639-41. http://www.mothur.org/wiki/Make.lookup"; } string getDescription() { return "Creates a lookup file for use with shhh.flows using user-supplied mock community data and flow grams"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string flowFileName, errorFileName, flowOrder, refFastaFileName, barcodeSequence, keySequence; vector outputNames; int thresholdCount; vector convertSeqToFlow(string sequence, string order); int alignFlowGrams(vector& flowgram, vector& refFlow, double gapOpening, vector > penaltyMatrix, string flowOrder); int regress(vector& data, int N); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/makesharedcommand.cpp000077500000000000000000001542331424121717000224040ustar00rootroot00000000000000/* * sharedcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "makesharedcommand.h" #include "counttable.h" //******************************************************************************************************************** //sorts lowest to highest inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){ return (left->getGroup() < right->getGroup()); } //********************************************************************************************************************** vector SharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none","shared",false,false); parameters.push_back(pshared); CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none","shared",false,false); parameters.push_back(pbiom); CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup","shared",false,false,true); parameters.push_back(plist); CommandParameter pcount("count", "InputTypes", "", "", "none", "GroupCount", "none","",false,false); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup","",false,false,true); parameters.push_back(pgroup); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","group",false,false); parameters.push_back(pgroups); CommandParameter pzero("keepzeroes", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pzero); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["tshared"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["map"] = tempOutNames; outputTypes["list"] = tempOutNames; abort = false; calledHelp = false; pickedGroups=false; allLines = true; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SharedCommand::getHelpString(){ try { string helpString = ""; helpString += "The make.shared command reads a list and group / count file or a biom file, or a shared file to convert or simply a count file and creates a shared file.\n"; helpString += "The make.shared command parameters are list, group, biom, groups, count, shared and label. list and group or count are required unless a current file is available or you provide a biom file or you are converting a shared file.\n"; helpString += "The count parameter allows you to provide a count file containing the group info for the list file. When the count file is provided without the list file, mothur will create a list and shared file for you.\n"; helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n"; helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],shared-[filename],[distance],shared"; } else if (type == "tshared") { pattern = "[filename],tshared-[filename],[distance],tshared"; } else if (type == "group") { pattern = "[filename],[group],groups"; } else if (type == "list") { pattern = "[filename],[distance],list"; } else if (type == "map") { pattern = "[filename],map"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SharedCommand::SharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { biomfile = ""; abort = true; } else if (biomfile == "not found") { biomfile = ""; } else { current->setBiomFile(biomfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } ordergroupfile = validParameter.validFile(parameters, "ordergroup"); if (ordergroupfile == "not open") { abort = true; } else if (ordergroupfile == "not found") { ordergroupfile = ""; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); CountTable temp; if (!temp.testGroups(countfile)) { m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n"); temp.readTable(countfile, false, false); //dont read groups map seqs = temp.getNameMap(); CountTable newCountTable; newCountTable.addGroup("mothurGroup"); for (map::iterator it = seqs.begin(); it != seqs.end(); it++) { vector counts; counts.push_back(it->second); newCountTable.push_back(it->first, counts); } string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile); newCountTable.printTable(newCountfileName); current->setCountFile(newCountfileName); countfile = newCountfileName; outputNames.push_back(newCountfileName); } } if ((biomfile == "") && (listfile == "") && (countfile == "") && (sharedfile == "")) { //you must provide at least one of the following //is there are current file available for either of these? //give priority to list, then biom, then count listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { biomfile = current->getBiomFile(); if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list, biom, shared or count file before you can use the make.shared command.\n"); abort = true; } } } } } else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; } if (listfile != "") { if ((groupfile == "") && (countfile == "")) { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n"); abort = true; } } } } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { pickedGroups=true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string temp = validParameter.valid(parameters, "keepzeroes"); if (temp == "not found"){ temp = "f"; } keepZeroes = util.isTrue(temp); if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label if (labels.size() == 0) { labels.insert("ASV"); } } } } catch(exception& e) { m->errorOut(e, "SharedCommand", "SharedCommand"); exit(1); } } //********************************************************************************************************************** int SharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (listfile != "") { createSharedFromListGroup(); } else if (biomfile != "") { createSharedFromBiom(); } else if (sharedfile != "") { convertSharedFormat(); } else if ((listfile == "") && (countfile != "")) { createSharedFromCount(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } } string currentName = ""; itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "execute"); exit(1); } } //********************************************************************************************************************** string SharedCommand::findFormat() { try { ifstream in; util.openInputFile(sharedfile, in); vector headers; util.getline(in, headers); if (headers.size() > 4) { return "shared"; } else { if (headers.size() == 4) { //check to make sure this isn't a shared file with 1 OTU if (headers[3] == "abundance") { return "tshared"; } }else { m->mothurOut("[ERROR]: cannot determine format of shared file. Expected 4 or more columns, found " + toString(headers.size()) + "columns, please correct.\n"); m->setControl_pressed(true); } } return "shared"; } catch(exception& e) { m->errorOut(e, "SharedCommand", "findFormat"); exit(1); } } //********************************************************************************************************************** void SharedCommand::convertSharedFormat() { try { //getting output filename map variables; if (outputdir == "") { outputdir += util.hasPath(sharedfile); } variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); string tag = findFormat(); if (m->getControl_pressed()) { return; } string sharedFilename = ""; if (tag == "shared") { //converting shared to tshared tag = "tshared"; sharedFilename = getOutputFileName(tag,variables); ofstream out; util.openOutputFile(sharedFilename, out); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); bool printHeaders = true; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } lookup->printTidy(out, printHeaders, keepZeroes); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); }else { //tshared - converting tshared to shared tag = "shared"; sharedFilename = getOutputFileName(tag,variables); ofstream out; util.openOutputFile(sharedFilename, out); ifstream inScan; util.openInputFile(sharedfile, inScan); util.getline(inScan); //read headers string label, group, otuName; int abundance; set labels; set groups; set otuNames; while (!inScan.eof()) { if (m->getControl_pressed()) { break; } inScan >> label >> group >> otuName >> abundance; gobble(inScan); labels.insert(label); groups.insert(group); otuNames.insert(otuName); } inScan.close(); vector oNames = util.mothurConvert(otuNames); vector gNames = util.mothurConvert(groups); sort(gNames.begin(), gNames.end()); int numGroups = gNames.size(); int numOTUs = oNames.size(); map groupNameToIndex; //groupName -> index in otuAbunds for (int i = 0; i < numGroups; i++) { groupNameToIndex[gNames[i]] = i; } map > > sharedVectors; map > >::iterator itDistance; map >::iterator itOTU; map::iterator itSample; for (set::iterator it = labels.begin(); it != labels.end(); it++) { //for each distance map > otus; //otuName -> abunds for (int i = 0; i < numOTUs; i++) { //for each OTU, set all otus to 0 vector emptyOTU; emptyOTU.resize(numGroups, 0); otus[oNames[i]] = emptyOTU; //add empty otu } sharedVectors[*it] = otus; //add empty vector } ifstream in; util.openInputFile(sharedfile, in); util.getline(in); //read headers while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> label >> group >> otuName >> abundance; gobble(in); itDistance = sharedVectors.find(label); if (itDistance != sharedVectors.end()) { //we have this label before - ie 0.03 or 0.05 itOTU = (itDistance->second).find(otuName); if (itOTU != (itDistance->second).end()) { //we have this otuName before - ie OTU0001 or OTU0234 itSample = groupNameToIndex.find(group); if (itSample != groupNameToIndex.end()) { //we have this sample before - ie FD01 or FD03 (itOTU->second)[itSample->second] = abundance; }else { m->mothurOut("[ERROR]: Cannot find sample " + group + ", skipping.\n"); } }else { m->mothurOut("[ERROR]: Cannot find otu " + otuName + ", skipping.\n"); } }else { m->mothurOut("[ERROR]: Cannot find label " + label + ", skipping.\n"); } } in.close(); bool printHeaders = true; //create sharedRabundVectors for (itDistance = sharedVectors.begin(); itDistance != sharedVectors.end(); itDistance++) { //for each distance //create empty shared vector with samples SharedRAbundVectors* shared = new SharedRAbundVectors(); for (itSample = groupNameToIndex.begin(); itSample != groupNameToIndex.end(); itSample++) { SharedRAbundVector* thisSample = new SharedRAbundVector(); thisSample->setGroup(itSample->first); shared->push_back(thisSample); } shared->setLabels(itDistance->first); //set distance for shared vector m->mothurOut(itDistance->first+"\n"); for (itOTU = (itDistance->second).begin(); itOTU != (itDistance->second).end(); itOTU++) { //for each OTU shared->push_back(itOTU->second, itOTU->first); //add otus abundance } shared->eliminateZeroOTUS(); shared->print(out, printHeaders); delete shared; } out.close(); } outputNames.push_back(sharedFilename); outputTypes[tag].push_back(sharedFilename); } catch(exception& e) { m->errorOut(e, "SharedCommand", "convertSharedFormat"); exit(1); } } //********************************************************************************************************************** int SharedCommand::createSharedFromCount() { try { //getting output filename if (outputdir == "") { outputdir += util.hasPath(countfile); } string label = "ASV"; if (labels.size() != 0) { label = *labels.begin(); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); variables["[distance]"] = "asv"; string listFilename = getOutputFileName("list",variables); outputNames.push_back(listFilename); outputTypes["list"].push_back(listFilename); ofstream outlist; util.openOutputFile(listFilename, outlist); CountTable ct; ct.readTable(countfile, true, false); map counts = ct.getNameMap(); ListVector list = ct.getListVector(); list.setLabel(label); list.print(outlist, counts); outlist.close(); listfile = listFilename; createSharedFromListGroup(); return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromCount"); exit(1); } } //********************************************************************************************************************** int SharedCommand::createSharedFromBiom() { try { //getting output filename string filename = biomfile; if (outputdir == "") { outputdir += util.hasPath(filename); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out; util.openOutputFile(filename, out); /*{ "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique", "format": "Biological Observation Matrix 0.9.1", "format_url": "http://biom-format.org", "type": "OTU table", "generated_by": "mothur1.24.0", "date": "Tue Apr 17 13:12:07 2012", */ ifstream in; util.openInputFile(biomfile, in); string matrixFormat = ""; int numRows = 0; int numCols = 0; int shapeNumRows = 0; int shapeNumCols = 0; vector otuNames; vector groupNames; map fileLines; vector names; int countOpenBrace = 0; int countClosedBrace = 0; int openParen = -1; //account for opening brace int closeParen = 0; bool ignoreCommas = false; bool atComma = false; string line = ""; string matrixElementType = ""; while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1" if (m->getControl_pressed()) { break; } char c = in.get(); gobble(in); if (c == '[') { countOpenBrace++; } else if (c == ']') { countClosedBrace++; } else if (c == '{') { openParen++; } else if (c == '}') { closeParen++; } else if ((!ignoreCommas) && (c == ',')) { atComma = true; } if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; } else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; } if (atComma && !ignoreCommas) { if (fileLines.size() == 0) { //clip first { line = line.substr(1); } string tag = getTag(line); fileLines[tag] = line; line = ""; atComma = false; ignoreCommas = false; }else { line += c; } } if (line != "") { line = line.substr(0, line.length()-1); string tag = getTag(line); fileLines[tag] = line; } in.close(); string biomType; map::iterator it; it = fileLines.find("type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); } else { string thisLine = it->second; biomType = getTag(thisLine); // if ((biomType != "OTU table") && (biomType != "OTUtable") && (biomType != "Taxon table") && (biomType != "Taxontable")) { m->mothurOut("[ERROR]: " + biomType + " is not a valid biom type for mothur. Only types allowed are OTU table and Taxon table.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } it = fileLines.find("matrix_type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); } else { string thisLine = it->second; matrixFormat = getTag(thisLine); if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } it = fileLines.find("matrix_element_type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); } else { string thisLine = it->second; matrixElementType = getTag(thisLine); if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->setControl_pressed(true); } if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); } } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } it = fileLines.find("rows"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); } else { string thisLine = it->second; if ((biomType == "Taxon table") || (biomType == "Taxontable")) { string mapFilename = getOutputFileName("map",variables); outputNames.push_back(mapFilename); outputTypes["map"].push_back(mapFilename); ofstream outMap; util.openOutputFile(mapFilename, outMap); vector taxonomies = readRows(thisLine, numRows); string snumBins = toString(numRows); for (int i = 0; i < numRows; i++) { //if there is a bin label use it otherwise make one string binLabel = "OTU"; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; otuNames.push_back(binLabel); outMap << otuNames[i] << '\t' << taxonomies[i] << endl; } outMap.close(); }else{ otuNames = readRows(thisLine, numRows); } } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } it = fileLines.find("columns"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); } else { string thisLine = it->second; //read sample names groupNames = readRows(thisLine, numCols); //if users selected groups, then remove the groups not wanted. if (Groups.size() == 0) { Groups = groupNames; } else { groupNames = Groups; } //set fileroot fileroot = outputdir + util.getRootName(util.getSimpleName(biomfile)); } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } it = fileLines.find("shape"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); } else { string thisLine = it->second; getDims(thisLine, shapeNumRows, shapeNumCols); //check shape if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->setControl_pressed(true); } if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { out.close(); util.mothurRemove(filename); return 0; } bool printHeaders = true; it = fileLines.find("data"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); } else { string thisLine = it->second; //read data SharedRAbundVectors* lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size()); lookup->setOTUNames(otuNames); lookup->eliminateZeroOTUS(); m->mothurOutEndLine(); m->mothurOut(lookup->getLabel()+"\n"); printSharedData(lookup, out, printHeaders); } if (m->getControl_pressed()) { util.mothurRemove(filename); return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromBiom"); exit(1); } } //********************************************************************************************************************** SharedRAbundVectors* SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector& groupNames, int numOTUs) { try { SharedRAbundVectors* lookup = new SharedRAbundVectors(); //creates new sharedRAbunds for (int i = 0; i < groupNames.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0 temp->setGroup(groupNames[i]); lookup->push_back(temp); } lookup->setLabels("userLabel"); bool dataStart = false; bool inBrackets = false; string num = ""; vector nums; int otuCount = 0; for (int i = 0; i < line.length(); i++) { if (m->getControl_pressed()) { return lookup; } //look for opening [ to indicate data is starting if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data if (dataStart) { if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && (inBrackets)) { inBrackets = false; int temp; float temp2; if (matrixElementType == "float") { util.mothurConvert(num, temp2); temp = floor(temp2); } else { util.mothurConvert(num, temp); } nums.push_back(temp); num = ""; //save info to vectors if (matrixFormat == "dense") { //sanity check if (nums.size() != lookup->getNumGroups()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); } //set abundances for this otu //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU for (int j = 0; j < groupNames.size(); j++) { lookup->set(otuCount, nums[j], groupNames[j]); } otuCount++; }else { //sanity check if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->setControl_pressed(true); } //nums contains [otuNum, sampleNum, abundance] lookup->set(nums[0], nums[2], groupNames[nums[1]]); } nums.clear(); } if (inBrackets) { if (line[i] == ',') { int temp; float temp2; if (matrixElementType == "float") { util.mothurConvert(num, temp2); temp = floor(temp2); } else { util.mothurConvert(num, temp); } nums.push_back(temp); num = ""; }else { if (!isspace(line[i])) { num += line[i]; } } } } } if (pickedGroups) { lookup->eliminateZeroOTUS(); } return lookup; } catch(exception& e) { m->errorOut(e, "SharedCommand", "readData"); exit(1); } } //********************************************************************************************************************** int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) { try { //get shape bool inBar = false; string num = ""; for (int i = 0; i < line.length(); i++) { //you want to ignore any ; until you reach the next ' if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && (inBar)) { inBar= false; util.mothurConvert(num, shapeNumCols); break; } if (inBar) { if (line[i] == ',') { util.mothurConvert(num, shapeNumRows); num = ""; }else { if (!isspace(line[i])) { num += line[i]; } } } } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "getDims"); exit(1); } } //********************************************************************************************************************** vector SharedCommand::readRows(string line, int& numRows) { try { /*"rows":[ {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, ... "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null}, {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null} .... make look like above ],*/ vector names; int countOpenBrace = 0; int countClosedBrace = 0; int openParen = 0; int closeParen = 0; string nextRow = ""; bool end = false; for (int i = 0; i < line.length(); i++) { if (m->getControl_pressed()) { return names; } if (line[i] == '[') { countOpenBrace++; } else if (line[i] == ']') { countClosedBrace++; } else if (line[i] == '{') { openParen++; } else if (line[i] == '}') { closeParen++; } else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info //you have reached the end of the rows info if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; } if ((openParen == closeParen) && (closeParen != 0)) { //process row numRows++; vector items; util.splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway string part = items[0]; items.clear(); util.splitAtChar(part, items, ':'); //split part we want containing the ids string name = items[1]; //remove "" if needed int pos = name.find("\""); if (pos != string::npos) { string newName = ""; for (int k = 0; k < name.length(); k++) { if (name[k] != '\"') { newName += name[k]; } } name = newName; } names.push_back(name); nextRow = ""; openParen = 0; closeParen = 0; } } return names; } catch(exception& e) { m->errorOut(e, "SharedCommand", "readRows"); exit(1); } } //********************************************************************************************************************** //designed for things like "type": "OTU table", returns type string SharedCommand::getTag(string& line) { try { bool inQuotes = false; string tag = ""; char c = '\"'; for (int i = 0; i < line.length(); i++) { //you want to ignore any ; until you reach the next ' if ((line[i] == c) && (!inQuotes)) { inQuotes = true; } else if ((line[i] == c) && (inQuotes)) { inQuotes= false; line = line.substr(i+1); return tag; } if (inQuotes) { if (line[i] != c) { tag += line[i]; } } } return tag; } catch(exception& e) { m->errorOut(e, "SharedCommand", "getInfo"); exit(1); } } //********************************************************************************************************************** int SharedCommand::createSharedFromListGroup() { try { GroupMap* groupMap = nullptr; CountTable* countTable = nullptr; pickedGroups = false; if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return 0; } vector allGroups = groupMap->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } }else{ countTable = new CountTable(); countTable->readTable(countfile, true, false); vector allGroups = countTable->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } } int numGroups = Groups.size(); if (m->getControl_pressed()) { return 0; } ofstream out; string filename = ""; if (!pickedGroups) { string filename = listfile; if (outputdir == "") { outputdir += util.hasPath(filename); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); util.openOutputFile(filename, out); } //set fileroot fileroot = outputdir + util.getRootName(util.getSimpleName(listfile)); map variables; variables["[filename]"] = fileroot; string errorOff = "no error"; InputData input(listfile, "shared", Groups); SharedListVector* SharedList = input.getSharedListVector(); string lastLabel = SharedList->getLabel(); SharedRAbundVectors* lookup; if (m->getControl_pressed()) { delete SharedList; if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } out.close(); if (!pickedGroups) { util.mothurRemove(filename); } return 0; } //sanity check vector namesSeqs; int numGroupNames = 0; if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); } else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); } int error = ListGroupSameSeqs(namesSeqs, SharedList); if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n"); m->setControl_pressed(true); out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made //delete memory delete SharedList; if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } return 0; } if (error == 1) { m->setControl_pressed(true); } //if user has specified groups make new groupfile for them if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file string groups = ""; if (numGroups < 4) { for (int i = 0; i < numGroups-1; i++) { groups += Groups[i] + "."; } groups+=Groups[numGroups-1]; }else { groups = "merge"; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[group]"] = groups; string newGroupFile = getOutputFileName("group",variables); outputTypes["group"].push_back(newGroupFile); outputNames.push_back(newGroupFile); ofstream outGroups; util.openOutputFile(newGroupFile, outGroups); vector names = groupMap->getNamesSeqs(); string groupName; for (int i = 0; i < names.size(); i++) { groupName = groupMap->getGroup(names[i]); if (isValidGroup(groupName, Groups)) { outGroups << names[i] << '\t' << groupName << endl; } } outGroups.close(); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; bool printHeaders = true; while((SharedList != nullptr) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { delete SharedList; if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){ lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputdir == "") { outputdir += util.hasPath(filename); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); } if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputdir == "") { outputdir += util.hasPath(filename); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); //restore real lastlabel to save below SharedList->setLabel(saveLabel); } lastLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(); //get new list vector to process } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { if (processedLabels.count(lastLabel) != 1) { needToRun = true; } } //run last label if you need to if (needToRun ) { if (SharedList != nullptr) { delete SharedList; } SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputdir == "") { outputdir += util.hasPath(filename); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; delete SharedList; } if (!pickedGroups) { out.close(); } if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } if (m->getControl_pressed()) { if (!pickedGroups) { util.mothurRemove(filename); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromListGroup"); exit(1); } } //********************************************************************************************************************** void SharedCommand::printSharedData(SharedRAbundVectors*& thislookup, ofstream& out, bool& printHeaders) { try { if (order.size() == 0) { //user has not specified an order so do aplabetically thislookup->print(out, printHeaders); }else{ //create a map from groupName to each sharedrabund map myMap; map::iterator myIt; vector data = thislookup->getSharedRAbundVectors(); for (int i = 0; i < data.size(); i++) { myMap[data[i]->getGroup()] = data[i]; } vector Groups; //loop through ordered list and print the rabund for (int i = 0; i < order.size(); i++) { myIt = myMap.find(order[i]); if(myIt != myMap.end()) { //we found it out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t'; (myIt->second)->print(out); Groups.push_back((myIt->second)->getGroup()); }else{ m->mothurOut("Can't find shared info for " + order[i] + ", skipping.\n"); } } for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); } } catch(exception& e) { m->errorOut(e, "SharedCommand", "printSharedData"); exit(1); } } //********************************************************************************************************************** int SharedCommand::ListGroupSameSeqs(vector& groupMapsSeqs, SharedListVector* SharedList) { try { int error = 0; set groupNamesSeqs; for(int i = 0; i < groupMapsSeqs.size(); i++) { groupNamesSeqs.insert(groupMapsSeqs[i]); } //go through list and if group returns "not found" output it for (int i = 0; i < SharedList->getNumBins(); i++) { if (m->getControl_pressed()) { return 0; } string names = SharedList->get(i); vector listNames; util.splitAtComma(names, listNames); for (int j = 0; j < listNames.size(); j++) { int num = groupNamesSeqs.count(listNames[j]); if (num == 0) { error = 1; if (groupfile != "") { m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct.\n"); } else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct.\n"); } }else { groupNamesSeqs.erase(listNames[j]); } } } for (set::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) { error = 1; m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct.\n"); } return error; } catch(exception& e) { m->errorOut(e, "SharedCommand", "ListGroupSameSeqs"); exit(1); } } //********************************************************************************************************************** SharedCommand::~SharedCommand(){ //delete list; } //********************************************************************************************************************** int SharedCommand::readOrderFile() { try { //remove old names order.clear(); ifstream in; util.openInputFile(ordergroupfile, in); string thisGroup; while(!in.eof()){ in >> thisGroup; gobble(in); order.push_back(thisGroup); if (m->getControl_pressed()) { order.clear(); break; } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "readOrderFile"); exit(1); } } //********************************************************************************************************************** bool SharedCommand::isValidGroup(string groupname, vector groups) { try { for (int i = 0; i < groups.size(); i++) { if (groupname == groups[i]) { return true; } } return false; } catch(exception& e) { m->errorOut(e, "SharedCommand", "isValidGroup"); exit(1); } } /************************************************************/ mothur-1.48.0/source/commands/makesharedcommand.h000077500000000000000000000045301424121717000220430ustar00rootroot00000000000000#ifndef SHAREDCOMMAND_H #define SHAREDCOMMAND_H /* * sharedcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "sharedlistvector.h" #include "inputdata.h" //********************************************************************************************************************** struct tidy { string otu; string group; int abund; tidy() : group(""), otu(""), abund(0) {} tidy(string o, string g, int a) : otu(o), group(g), abund(a) {} }; //********************************************************************************************************************** /* The shared() command: The shared command can only be executed after a successful read.shared command. The shared command parses a .list file and separates it into groups. It outputs a .shared file containing the OTU information for each group. There are no shared command parameters. The shared command should be in the following format: shared(). */ class SharedCommand : public Command { public: SharedCommand(string); ~SharedCommand(); vector setParameters(); string getCommandName() { return "make.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Make.shared"; } string getDescription() { return "make a shared file from a list and group file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: void printSharedData(SharedRAbundVectors*&, ofstream&, bool&); int readOrderFile(); bool isValidGroup(string, vector); int ListGroupSameSeqs(vector&, SharedListVector*); int createSharedFromListGroup(); int createSharedFromBiom(); int createSharedFromCount(); void convertSharedFormat(); string findFormat(); string getTag(string&); vector readRows(string, int&); int getDims(string, int&, int&); SharedRAbundVectors* readData(string, string, string, vector&, int); vector Groups, outputNames, order; set labels; string fileroot, listfile, groupfile, biomfile, ordergroupfile, countfile, sharedfile; bool firsttime, pickedGroups, abort, allLines, keepZeroes; }; #endif mothur-1.48.0/source/commands/mantelcommand.cpp000077500000000000000000000212341424121717000215520ustar00rootroot00000000000000/* * mantelcommand.cpp * mothur * * Created by westcott on 2/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mantelcommand.h" #include "readphylipvector.h" //********************************************************************************************************************** vector MantelCommand::setParameters(){ try { CommandParameter pphylip1("phylip1", "InputTypes", "", "", "none", "none", "none","mantel",false,true,true); parameters.push_back(pphylip1); CommandParameter pphylip2("phylip2", "InputTypes", "", "", "none", "none", "none","mantel",false,true,true); parameters.push_back(pphylip2); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["mantel"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MantelCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MantelCommand::getHelpString(){ try { string helpString = ""; helpString += "Sokal, R. R., & Rohlf, F. J. (1995). Biometry, 3rd edn. New York: Freeman.\n"; helpString += "The mantel command reads two distance matrices and calculates the mantel correlation coefficient.\n"; helpString += "The mantel command parameters are phylip1, phylip2, iters and method. The phylip1 and phylip2 parameters are required. Matrices must be the same size and contain the same names.\n"; helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n"; helpString += "The mantel command should be in the following format: mantel(phylip1=veg.dist, phylip2=env.dist).\n"; helpString += "The mantel command outputs a .mantel file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MantelCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MantelCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "mantel") { pattern = "[filename],mantel"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MantelCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MantelCommand::MantelCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; phylipfile1 = validParameter.validFile(parameters, "phylip1"); if (phylipfile1 == "not open") { phylipfile1 = ""; abort = true; } else if (phylipfile1 == "not found") { phylipfile1 = ""; m->mothurOut("phylip1 is a required parameter for the mantel command.\n"); abort = true; } phylipfile2 = validParameter.validFile(parameters, "phylip2"); if (phylipfile2 == "not open") { phylipfile2 = ""; abort = true; } else if (phylipfile2 == "not found") { phylipfile2 = ""; m->mothurOut("phylip2 is a required parameter for the mantel command.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(phylipfile1); } method = validParameter.valid(parameters, "method"); if (method == "not found"){ method = "pearson"; } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "MantelCommand", "MantelCommand"); exit(1); } } //********************************************************************************************************************** int MantelCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } /***************************************************/ // reading distance files // /***************************************************/ //read phylip1 ReadPhylipVector readMatrix(phylipfile1); vector< vector > matrix1; vector names1 = readMatrix.read(matrix1); if (m->getControl_pressed()) { return 0; } //read phylip2 ReadPhylipVector readMatrix2(phylipfile2); vector< vector > matrix2; vector names2 = readMatrix2.read(matrix2); if (m->getControl_pressed()) { return 0; } //make sure matrix2 and matrix1 are in the same order if (names1 == names2) { //then everything is in same order and same size }else if (names1.size() != names2.size()) { //wrong size no need to order, abort m->mothurOut("[ERROR]: distance matrices are not the same size, aborting.\n"); m->setControl_pressed(true); }else { //sizes are the same, but either the names are different or they are in different order m->mothurOut("[WARNING]: Names do not match between distance files. Comparing based on order in files.\n"); } if (m->getControl_pressed()) { return 0; } /***************************************************/ // calculating mantel and signifigance // /***************************************************/ //calc mantel coefficient LinearAlgebra linear; double mantel = 0.0; if (method == "pearson") { mantel = linear.calcPearson(matrix1, matrix2); } else if (method == "spearman") { mantel = linear.calcSpearman(matrix1, matrix2); } else if (method == "kendall") { mantel = linear.calcKendall(matrix1, matrix2); } //calc signifigance int count = 0; for (int i = 0; i < iters; i++) { if (m->getControl_pressed()) { return 0; } //randomize matrix2 vector< vector > matrix2Copy = matrix2; util.mothurRandomShuffle(matrix2Copy); //calc random mantel double randomMantel = 0.0; if (method == "pearson") { randomMantel = linear.calcPearson(matrix1, matrix2Copy); } else if (method == "spearman") { randomMantel = linear.calcSpearman(matrix1, matrix2Copy); } else if (method == "kendall") { randomMantel = linear.calcKendall(matrix1, matrix2Copy); } if (randomMantel >= mantel) { count++; } } double pValue = count / (float) iters; if (m->getControl_pressed()) { return 0; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipfile1)); string outputFile = getOutputFileName("mantel",variables); outputNames.push_back(outputFile); outputTypes["mantel"].push_back(outputFile); ofstream out; util.openOutputFile(outputFile, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); out << "Mantel\tpValue" << endl; out << mantel << '\t' << pValue << endl; out.close(); m->mothurOut("\nmantel = " + toString(mantel) + "\tpValue = " + toString(pValue) + "\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MantelCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mantelcommand.h000077500000000000000000000020701424121717000212140ustar00rootroot00000000000000#ifndef MANTELCOMMAND_H #define MANTELCOMMAND_H /* * mantelcommand.h * mothur * * Created by westcott on 2/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "linearalgebra.h" class MantelCommand : public Command { public: MantelCommand(string); ~MantelCommand(){} vector setParameters(); string getCommandName() { return "mantel"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Mantel"; } string getDescription() { return "Mantel’s test for correlation between matrices"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string phylipfile1, phylipfile2, method; bool abort; int iters; vector outputNames; }; #endif mothur-1.48.0/source/commands/mergecountcommand.cpp000077500000000000000000000217021424121717000224420ustar00rootroot00000000000000// // mergecountcommand.cpp // Mothur // // Created by Sarah Westcott on 8/3/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #include "mergecountcommand.hpp" #include "counttable.h" //********************************************************************************************************************** vector MergeCountCommand::setParameters(){ try { CommandParameter pcount("count", "InputTypes", "", "", "", "", "","count",false,false,true); parameters.push_back(pcount); CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeCountCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MergeCountCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.count command takes a list of count files separated by dashes and merges them into one file."; helpString += "The merge.count command parameters are count and output."; helpString += "Example merge.count(count=final.count_table-new.count_table, output=complete.count_table)."; return helpString; } catch(exception& e) { m->errorOut(e, "MergeCountCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** MergeCountCommand::MergeCountCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } string fileList = validParameter.validPath(parameters, "count"); if(fileList == "not found") { m->mothurOut("[ERROR]: you must enter two or more count file names\n"); abort=true; } else{ util.splitAtDash(fileList, fileNames); } numInputFiles = fileNames.size(); ifstream testFile; if(numInputFiles == 0){ m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names\n"); abort=true; } else{ for(int i=0;i file; file["file"] = fileNames[i]; fileNames[i] = validParameter.validFile(file, "file"); if(fileNames[i] == "not found"){ abort = true; } } } outputFileName = validParameter.validPath(parameters, "output"); if (outputFileName == "not found") { m->mothurOut("you must enter an output file name\n"); abort=true; } else if (outputdir != "") { outputFileName = outputdir + util.getSimpleName(outputFileName); } } } catch(exception& e) { m->errorOut(e, "MergeCountCommand", "MergeCountCommand"); exit(1); } } //********************************************************************************************************************** int MergeCountCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } util.mothurRemove(outputFileName); //read headers from each file to confirm all contain groupinfo or all do not //Also collect all group names bool allContainGroups = true; bool allNoGroups = true; set allGroups; for(int i = 0; i < numInputFiles; i++) { if (m->getControl_pressed()) { return 0; } vector thisTablesGroups; CountTable table; bool hasGroups = table.testGroups(fileNames[i], thisTablesGroups); if (hasGroups) { allNoGroups = false; for (int j = 0; j < thisTablesGroups.size(); j++) { allGroups.insert(thisTablesGroups[j]); } }else { allContainGroups = false; } } int numGroups = allGroups.size(); //check to make sure all files are one type - quit if not if (!allContainGroups && !allNoGroups) { m->mothurOut("[ERROR]: your have countfiles that contains group information and count files that do not. These cannot be combined without loss of information, please correct.\n"); m->setControl_pressed(true); return 0; } if (m->getControl_pressed()) { return 0; } //Create Blank Table - (set&, map&, set&); //seqNames, seqName->group, groupNames set seqNames; map seqGroup; set g; CountTable completeTable; completeTable.createTable(seqNames, seqGroup, g); //append first one to get headers map groupIndex; if (allNoGroups) { util.appendBinaryFiles(fileNames[0], outputFileName); } else { //create groupMap to save time setting abundance vector int count = 0; for (set::iterator it = allGroups.begin(); it != allGroups.end(); it++) { completeTable.addGroup(*it); groupIndex[*it] = count; count++; } } //for each file for(int i = 0; i < numInputFiles; i++) { if (m->getControl_pressed()) { break; } if (allContainGroups) { CountTable table; table.readTable(fileNames[i], true, false); vector groups = table.getNamesOfGroups(); vector seqs = table.getNamesOfSeqs(); for (int j = 0; j < seqs.size(); j++) { if (m->getControl_pressed()) { break; } vector abunds = table.getGroupCounts(seqs[j]); vector newAbunds; newAbunds.resize(numGroups, 0); for (int k = 0; k < abunds.size(); k++) { if (abunds[k] != 0) { //we need to set abundance in vector with all groups //groups and abunds are in matching order. we know all groups are in groupIndex from above. int newIndex = groupIndex[groups[k]]; newAbunds[newIndex] = abunds[k]; } } completeTable.push_back(seqs[j], newAbunds, true); } } else { util.appendFilesWithoutHeaders(fileNames[i], outputFileName); } //No group info so simple append } if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } //print new table if (allContainGroups) { completeTable.printTable(outputFileName); } if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } //update current count file current->setCountFile(outputFileName); m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["merge"].push_back(outputFileName); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MergeCountCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mergecountcommand.hpp000077500000000000000000000021471424121717000224510ustar00rootroot00000000000000// // mergecountcommand.hpp // Mothur // // Created by Sarah Westcott on 8/3/16. // Copyright © 2016 Schloss Lab. All rights reserved. // #ifndef mergecountcommand_hpp #define mergecountcommand_hpp #include "command.hpp" class MergeCountCommand : public Command { #ifdef UNIT_TEST //friend class TestMergeCountCommand; #endif public: MergeCountCommand(string); ~MergeCountCommand() = default; vector setParameters(); string getCommandName() { return "merge.count"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string) { return ""; } string getCitation() { return "http://www.mothur.org/wiki/Merge.count"; } string getDescription() { return "reads count files and combines them into a single count file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string inputDir, countfile, output, outputFileName; vector outputNames, fileNames; int numInputFiles; }; #endif /* mergecountcommand_hpp */ mothur-1.48.0/source/commands/mergefilecommand.cpp000077500000000000000000000314001424121717000222250ustar00rootroot00000000000000/* * mergefilecommand.cpp * Mothur * * Created by Pat Schloss on 6/14/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "mergefilecommand.h" //********************************************************************************************************************** vector MergeFileCommand::setParameters(){ try { CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput); CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter ptaxonomy("taxonomy", "", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pfasta("fasta", "", "", "", "none", "none", "none","taxonomy",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); abort = false; calledHelp = false; appendMode = true; vector tempOutNames; outputTypes["merge"] = tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MergeFileCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.file command takes a list of files separated by dashes and appends them into one file. Altternatively, the merge file command can combine the data of several files. For example, you can combine a fasta, taxonomy and name or count field to achieve outputs like: GQY1XT001C44N8 3677 Bacteria;Bacteroidetes;Bacteroidia;Bacteroidales;Porphyromonadaceae;Porphyromonadaceae_unclassified; C-G--T-T--GA-A-A-C-T-G-G--CG-T-T-C--T-T-G-A-G-T-G-G-GC-GA-G-A-A-G-T-A--TG-C-GG-A-ATG-C-G-T-G-GT-GT-A-G-CGGT-G-AAA--..."; helpString += "The merge.file command parameters are input and output or fasta, taxonomy, name and count."; helpString += "Example merge.file(input=small.fasta-large.fasta, output=all.fasta)."; helpString += "Example merge.file(fasta=final.fasta, name=final.names, taxonomy=final.taxonomy)."; return helpString; } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MergeFileCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],merged,[extension]"; } else if (type == "merge") { pattern = ""; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MergeFileCommand::MergeFileCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } string fileList = validParameter.validPath(parameters, "input"); if(fileList == "not found") { appendMode = false; fileList = ""; } else{ util.splitAtDash(fileList, fileNames); } outputFileName = validParameter.validPath(parameters, "output"); if (outputFileName == "not found") { appendMode = false; outputFileName = ""; } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); appendMode = false; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); appendMode = false; } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); appendMode = false; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); appendMode = false; } if (!appendMode) { //if you are not appending, fasta is required as well as at least one of taxonomy, name or count if (fastafile == "") { //look for current fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } if ((namefile == "") && (countfile == "") && (taxfile == "")) { taxfile = current->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current taxonomy, name or count files. At least one is required. \n"); abort = true; } } } } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } }else { numInputFiles = fileNames.size(); ifstream testFile; if(numInputFiles == 0){ m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names\n"); abort=true; } else{ for(int i=0;igetLocations())) { } else { fileNames.erase(fileNames.begin()+i); i--; } //erase from file list path = util.hasPath(fileNames[i]); if (path != "") { if (outputdir == "") { outputdir = path; } } } if (outputdir != "") { outputFileName = outputdir + util.getSimpleName(outputFileName); } } } } } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "MergeFileCommand"); exit(1); } } //********************************************************************************************************************** int MergeFileCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (appendMode) { util.mothurRemove(outputFileName); for(int i=0;igetControl_pressed()) { util.mothurRemove(outputFileName); return 0; } //set taxonomy file as new current taxonomyfile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "execute"); exit(1); } } //********************************************************************************************************************** string MergeFileCommand::mergeFileData(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); outputFileName = getOutputFileName("fasta", variables); ofstream out; util.openOutputFile(outputFileName, out); //extract seq counts from name or count file map nameMap; map::iterator itCount; bool useNameMap = true; if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); }else if (namefile != "") { nameMap = util.readNames(namefile); }else { useNameMap = false; } map taxMap; map::iterator itTax; bool useTax = false; if (taxfile != "") { util.readTax(taxfile, taxMap, false); useTax = true; } ifstream in; util.openInputFile(fastafile, in); while(!in.eof()){ if (m->getControl_pressed()) { break; } Sequence currSeq(in); gobble(in); string comment = " "; if (useNameMap) { itCount = nameMap.find(currSeq.getName()); if (itCount != nameMap.end()) { comment += toString(itCount->second) + " "; nameMap.erase(itCount); }else { m->mothurOut("[ERROR]: Missing count data for " + currSeq.getName() + ", please correct.\n"); m->setControl_pressed(true); } } if (useTax) { itTax = taxMap.find(currSeq.getName()); if (itTax != taxMap.end()) { comment += itTax->second; taxMap.erase(itTax); }else { m->mothurOut("[ERROR]: Missing taxonomy for " + currSeq.getName() + ", please correct.\n"); m->setControl_pressed(true); } } currSeq.setComment(comment); currSeq.printSequence(out); } in.close(); out.close(); return outputFileName; } catch(exception& e) { m->errorOut(e, "MergeFileCommand", "mergeFileData"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mergefilecommand.h000077500000000000000000000020041424121717000216700ustar00rootroot00000000000000#ifndef MERGEFILECOMMAND_H #define MERGEFILECOMMAND_H /* * mergefilecommand.h * Mothur * * Created by Pat Schloss on 6/14/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "sequence.hpp" #include "counttable.h" class MergeFileCommand : public Command { public: MergeFileCommand(string); ~MergeFileCommand(){} vector setParameters(); string getCommandName() { return "merge.files"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Merge.files"; } string getDescription() { return "appends files creating one file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector fileNames, outputNames; string outputFileName, fastafile, namefile, countfile, taxfile; int numInputFiles; bool abort; bool appendMode; string mergeFileData(); }; #endif mothur-1.48.0/source/commands/mergegroupscommand.cpp000077500000000000000000000617511424121717000226410ustar00rootroot00000000000000/* * mergegroupscommand.cpp * mothur * * Created by westcott on 1/24/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mergegroupscommand.h" #include "counttable.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector MergeGroupsCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false,true); parameters.push_back(pshared); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "sharedGroup", "countfasta","count",false,false,true); parameters.push_back(pcount); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "countfasta","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pmethod("method", "Multiple", "sum-average-median", "sum", "", "", "","",false,false, true); parameters.push_back(pmethod); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MergeGroupsCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.groups command input files are shared, group, count, fasta and a design file. It reads the design file and merges the groups in the other files accordingly.\n"; helpString += "The design parameter allows you to assign your groups to sets. It is required. \n"; helpString += "The fasta parameter allows you to provide a fasta file associated with your count file. This is used if you are using the median method, so that sequences that are entirely removed from the counttable will also be removed from the fasta file. \n"; helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes. By default all groups are selected.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The groups parameter allows you to select groups you would like, and are also separated by dashes.\n"; helpString += "The method parameter allows you to select method you would like to use to merge the groups. Options are sum, average and median. Default=sum.\n"; helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n"; helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n"; helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MergeGroupsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],merge,[extension]"; } else if (type == "group") { pattern = "[filename],merge,[extension]"; } else if (type == "count") { pattern = "[filename],merge,[extension]"; } else if (type == "fasta") { pattern = "[filename],merge,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MergeGroupsCommand::MergeGroupsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { abort = true; } else if (designfile == "not found") { //if there is a current shared file, use it designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current designfile and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; sharedfile = ""; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; groupfile = ""; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; countfile = ""; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = "all"; } util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } method = validParameter.valid(parameters, "method"); if(method == "not found"){ method = "sum"; } if ((method != "sum") && (method != "average") && (method != "median")) { m->mothurOut(method + " is not a valid method. Options are sum, average and median. I will use sum.\n"); method = "sum"; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((sharedfile == "") && (groupfile == "") && (countfile == "")) { //give priority to group, then shared groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required.\n"); abort = true; } } } } if ((countfile == "") && (fastafile != "")) { m->mothurOut("[ERROR]: You may only use the fasta file with the count file, quitting.\n"); abort=true; } else if ((countfile != "") && (method == "average")) { m->mothurOut("You may not use the average method with the count file. I will use the sum method.\n"); method = "sum"; } else if ((countfile != "") && (method == "median") && (fastafile == "")) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: Fasta file is required with the median method and a count file so that sequences removed from your count table can also be removed from your fasta file to avoid downstream file mismatches, quitting.\n"); abort=true; } } } } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "MergeGroupsCommand"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } designMap = new DesignMap(designfile); if (m->getControl_pressed()) { delete designMap; return 0; } if (method != "sum") { string defaultClass = designMap->getDefaultClass(); vector treatments = designMap->getCategory(defaultClass); set numGroupsPerTreatment; for (int i = 0; i < treatments.size(); i++) { if (m->getControl_pressed()) { break; } map > checkTreatments; vector temp; temp.push_back(treatments[i]); checkTreatments[defaultClass] = temp; numGroupsPerTreatment.insert(designMap->getNumUnique(checkTreatments)); } if (numGroupsPerTreatment.size() > 1) { m->mothurOut("[ERROR]: The median and average methods require you to have the same number of sequences in each treatment, quitting.\n"); delete designMap; return 0; } } if (groupfile != "") { processGroupFile(designMap); } if (sharedfile != "") { processSharedFile(designMap); } if (countfile != "") { processCountFile(designMap); } //reset groups parameter delete designMap; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0;} //set shared file as new current sharedfile string currentName = ""; itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::process(SharedRAbundVectors*& thisLookUp, ofstream& out, bool& printHeaders){ try { vector setNames = designMap->getCategory(); //create sharedRabundVectors vector data = thisLookUp->getSharedRAbundVectors(); //create SharedRAbundVectors for the merged groups. Fill with blank rabundFloatVectors SharedRAbundVectors* merged; merged = new SharedRAbundVectors(); for (int i = 0; i < setNames.size(); i++) { SharedRAbundVector* myLookup = new SharedRAbundVector(thisLookUp->getNumBins()); myLookup->setLabel(thisLookUp->getLabel()); myLookup->setGroup(setNames[i]); merged->push_back(myLookup); } //for each OTU for (int j = 0; j < data[0]->getNumBins(); j++) { if (m->getControl_pressed()) { break; } map > otusGroupAbunds; map >::iterator itAbunds; //for each sample for (int i = 0; i < data.size(); i++) { string grouping = designMap->get(data[i]->getGroup()); //what set to your belong to if (grouping == "not found") { m->mothurOut("[ERROR]: " + data[i]->getGroup() + " is not in your design file. Ignoring!\n"); grouping = "NOTFOUND"; } else { //Add this OTUs values to sets abunds itAbunds = otusGroupAbunds.find(grouping); if (itAbunds == otusGroupAbunds.end()) { //new group vector temp; temp.push_back(data[i]->get(j)); otusGroupAbunds[grouping] = temp; }else { (itAbunds->second).push_back(data[i]->get(j)); } } } //find results for this bin. Set merged value for this bin in the results for (itAbunds = otusGroupAbunds.begin(); itAbunds != otusGroupAbunds.end(); itAbunds++) { int abund = mergeAbund(itAbunds->second); merged->set(j, abund, itAbunds->first); } } //free memory for (int i = 0; i < data.size(); i++) { delete data[i]; } if (m->getControl_pressed()) { delete merged; return 0; } merged->eliminateZeroOTUS(); // remove any zero OTUs created by median option. //print new file merged->print(out, printHeaders); delete merged; return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "process"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::processSharedFile(DesignMap*& designMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); string outputFileName = getOutputFileName("shared", variables); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); bool printHeaders = true; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup, out, printHeaders); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processSharedFile"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::processGroupFile(DesignMap*& designMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); //read groupfile GroupMap groupMap(groupfile); groupMap.readMap(); vector nameGroups = groupMap.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector namesOfSeqs = groupMap.getNamesSeqs(); bool error = false; for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } string thisGroup = groupMap.getGroup(namesOfSeqs[i]); //are you in a group the user wants if (util.inUsersGroups(thisGroup, Groups)) { string thisGrouping = designMap->get(thisGroup); if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct.\n"); error = true; } else { out << namesOfSeqs[i] << '\t' << thisGrouping << endl; } } } if (error) { m->setControl_pressed(true); } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processGroupFile"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups vector nameGroups = countTable.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->getControl_pressed()) { break; } if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector treatments = designMap->getCategory(); map > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set namesToRemove; vector namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } vector thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->setControl_pressed(true); return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; util.openOutputFile(accnosFile, out); //output to .accnos file for (set::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); util.mothurRemove(accnosFile); } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } } //********************************************************************************************************************** int MergeGroupsCommand::mergeAbund(vector values){ try { int abund = 0; if (method == "sum") { abund = util.sum(values); }else if (method == "average") { abund = util.average(values); }else if (method == "median") { abund = util.median(values); }else { m->mothurOut("[ERROR]: Invalid method. \n"); m->setControl_pressed(true); return 0; } return abund; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "mergeAbund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mergegroupscommand.h000077500000000000000000000026471424121717000223050ustar00rootroot00000000000000#ifndef MERGEGROUPSCOMMAND_H #define MERGEGROUPSCOMMAND_H /* * mergegroupscommand.h * mothur * * Created by westcott on 1/24/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "designmap.h" class MergeGroupsCommand : public Command { #ifdef UNIT_TEST friend class TestMergeGroupsCommand; #endif public: MergeGroupsCommand(string); ~MergeGroupsCommand() = default; vector setParameters(); string getCommandName() { return "merge.groups"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Merge.groups"; } string getDescription() { return "reads shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: DesignMap* designMap; bool abort, allLines, pickedGroups; set labels; //holds labels to be used string groups, label, inputDir, designfile, sharedfile, groupfile, countfile, method, fastafile; vector Groups, outputNames; int process(SharedRAbundVectors*&, ofstream&, bool&); int processSharedFile(DesignMap*&); int processGroupFile(DesignMap*&); int processCountFile(DesignMap*&); int mergeAbund(vector); }; #endif mothur-1.48.0/source/commands/mergeotuscommand.cpp000066400000000000000000000655731424121717000223170ustar00rootroot00000000000000// // mergeotuscommand.cpp // Mothur // // Created by Sarah Westcott on 12/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "mergeotuscommand.hpp" //********************************************************************************************************************** vector MergeOTUsCommand::setParameters(){ try { CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true, true); parameters.push_back(pconstaxonomy); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","shared",false,true,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none","relabund",false,true,true); parameters.push_back(prelabund); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(plist); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter ptaxlevel("taxlevel", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(ptaxlevel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["relabund"] = tempOutNames; outputTypes["constaxonomy"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MergeOTUsCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.otus command parameters are shared, list, relabund, constaxonomy, taxlevel and label. constaxonomy is a required, unless you have a valid current file.\n"; helpString += "The taxlevel parameter allows you to specify the taxonomy level you would like to use when merging. Default=maxlevel.\n"; helpString += "Example merge.otus(shared=yourSharedFile, constaxonomy=yourConsTaxonomyFile).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MergeOTUsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],merge,[extension]"; } else if (type == "list") { pattern = "[filename],merge,[extension]"; } else if (type == "relabund") { pattern = "[filename],merge,[extension]"; } else if (type == "constaxonomy") { pattern = "[filename],[label],merge,cons.taxonomy"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MergeOTUsCommand::MergeOTUsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not found") { sharedfile = ""; } else if (sharedfile == "not open") { sharedfile = ""; abort = true; } else { current->setSharedFile(sharedfile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not found") { listfile = ""; } else if (listfile == "not open") { listfile = ""; abort = true; } else { current->setListFile(listfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not found") { relabundfile = ""; } else if (relabundfile == "not open") { relabundfile = ""; abort = true; } else { current->setRelAbundFile(relabundfile); } constaxfile = validParameter.validFile(parameters, "constaxonomy"); //required if (constaxfile == "not found") { constaxfile = current->getConsTaxonomyFile(); if (constaxfile != "") { m->mothurOut("Using " + constaxfile + " as input file for the constaxonomy parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current constaxonomy file and the constaxonomy parameter is required.\n"); abort = true; } } else if (constaxfile == "not open") { constaxfile = ""; abort = true; } else { current->setConsTaxonomyFile(constaxfile); } if ((relabundfile == "") && (listfile == "") && (sharedfile == "")) { //no files to merge provided, look for currents //is there are current file available for any of these? //give priority to shared, then list, then relabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the rabund parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list, relabund or shared file.\n"); abort = true; } } } } if (outputdir == ""){ outputdir += util.hasPath(constaxfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string temp = validParameter.valid(parameters, "taxlevel"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, taxLevelCutoff); //-1 means use max level } } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "MergeOTUsCommand"); exit(1); } } //********************************************************************************************************************** MergeOTUsCommand::~MergeOTUsCommand(){} //********************************************************************************************************************** int MergeOTUsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read in consensus taxonomy vector conTaxes = util.readConsTax(constaxfile); PhyloTree tree; //add consensus taxonomy to phylo tree for (size_t i = 0; i < conTaxes.size(); i++) { tree.addSeqToTree(conTaxes[i].name, conTaxes[i].taxonomy); } //build tree tree.assignHeirarchyIDs(0); //get max level of phylotree int maxlevel = tree.getMaxLevel(); //is the taxlevel parameter valid - not greater than the max level in the file. If greater reduce to maxlevel. if (taxLevelCutoff == -1) { //default, use maxlevel taxLevelCutoff = maxlevel; }else if ( taxLevelCutoff > maxlevel) { //invalid taxlevel, use maxlevel m->mothurOut("[WARNING]: The taxlevel selected is larger than the maxlevel in your constaxonomy file, disregarding. Using the max level of " + toString(maxlevel) + " for the taxlevel parameter.\n"); taxLevelCutoff = maxlevel; } for (size_t i = 0; i < conTaxes.size(); i++) { string otuTax = conTaxes[i].taxonomy; if (taxLevelCutoff != maxlevel) { otuTax = util.trimTax(otuTax, taxLevelCutoff); } otuLabel2ConsTax[conTaxes[i].name] = otuTax; if (listfile != "") { otuLabel2ConsSize[conTaxes[i].name] = conTaxes[i].abundance; } } conTaxes.clear(); //extract tree nodes at taxlevel vector thisLevelsNodes = tree.getNodes(taxLevelCutoff); //merge otus at each node at this level if (listfile != "") { mergeListOTUs(thisLevelsNodes); } else if (sharedfile != "") { mergeSharedOTUs(thisLevelsNodes); } else if (relabundfile != "") { mergeRelabundOTUs(thisLevelsNodes); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("relabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRelAbundFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::mergeSharedOTUs(vector& nodes){ try { string numNodes = toString(nodes.size()); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); string outputFileName = getOutputFileName("shared", variables); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); bool printHeaders = true; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup, out, printHeaders, nodes); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "mergeSharedOTUs"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::process(SharedRAbundVectors*& thisLookUp, ofstream& out, bool& printHeaders, vector& nodes){ try { vector groups = thisLookUp->getNamesGroups(); //create SharedRAbundVectors for the merged groups. Fill with blank rabundFloatVectors SharedRAbundVectors* merged; merged = new SharedRAbundVectors(); for (int i = 0; i < groups.size(); i++) { SharedRAbundVector* myLookup = new SharedRAbundVector(); myLookup->setLabel(thisLookUp->getLabel()); myLookup->setGroup(groups[i]); merged->push_back(myLookup); } //translate otuNames to bin numbers map otuName2BinNumber; map::iterator it; for (int j = 0; j < thisLookUp->getNumBins(); j++) { if (m->getControl_pressed()) { break; } otuName2BinNumber[thisLookUp->getOTUName(j)] = j; } if (m->getControl_pressed()) { delete merged; return 0; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxfile)); variables["[label]"] = thisLookUp->getLabel(); string outputFileName = getOutputFileName("constaxonomy", variables); outputTypes["constaxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream outCons; util.openOutputFile(outputFileName, outCons); outCons << "OTU\tSize\tTaxonomy\n"; for (int i = 0; i < nodes.size(); i++) { if (m->getControl_pressed()) { break; } vector otuNames = nodes[i].accessions; //otus to merge string newOTUName = otuNames[0]; vector abunds; abunds.resize(groups.size(), 0); for (int j = 0; j < otuNames.size(); j++) { it = otuName2BinNumber.find(otuNames[j]); //do we have this otu in the shared file if (it != otuName2BinNumber.end()) { //we found it vector thisOtusAbunds = thisLookUp->getOTU(it->second); for (int k = 0; k < thisOtusAbunds.size(); k++) { abunds[k] += thisOtusAbunds[k]; } //add this otus abunds to merged otu abunds }else { m->mothurOut("[ERROR]: missing otu " + otuNames[j] + " from shared file, cannot continue.\n"); m->setControl_pressed(true); break; } } merged->push_back(abunds, newOTUName); //merge consensus taxonomy results int sumOtu = util.sum(abunds); outCons << newOTUName << '\t' << sumOtu << '\t' << otuLabel2ConsTax[newOTUName] << endl; } if (m->getControl_pressed()) { delete merged; return 0; } merged->eliminateZeroOTUS(); // remove any zero OTUs created by median option. //print new file merged->print(out, printHeaders); delete merged; outCons.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "process"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::mergeListOTUs(vector& nodes){ try { string numNodes = toString(nodes.size()); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); string outputFileName = getOutputFileName("list", variables); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); InputData input(listfile, "list", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); bool printHeaders = true; while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list, out, printHeaders, nodes); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "mergeListOTUs"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::process(ListVector*& list, ofstream& out, bool& printHeaders, vector& nodes){ try { ListVector* merged; merged = new ListVector(); merged->setLabel(list->getLabel()); //translate otuNames to bin numbers map otuName2BinNumber; map::iterator it; for (int j = 0; j < list->getNumBins(); j++) { if (m->getControl_pressed()) { break; } otuName2BinNumber[list->getOTUName(j)] = j; } if (m->getControl_pressed()) { delete merged; return 0; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxfile)); variables["[label]"] = list->getLabel(); string outputFileName = getOutputFileName("constaxonomy", variables); outputTypes["constaxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream outCons; util.openOutputFile(outputFileName, outCons); outCons << "OTU\tSize\tTaxonomy\n"; for (int i = 0; i < nodes.size(); i++) { if (m->getControl_pressed()) { break; } vector otuNames = nodes[i].accessions; //otus to merge string newOTUName = otuNames[0]; string mergedNames = ""; it = otuName2BinNumber.find(newOTUName); //do we have this otu in the shared file int sizeOtu = 0; if (it != otuName2BinNumber.end()) { //we found it mergedNames = list->get(it->second); sizeOtu += otuLabel2ConsSize[newOTUName]; }else { m->mothurOut("[ERROR]: missing otu " + newOTUName + " from list file, cannot continue.\n"); m->setControl_pressed(true); } for (int j = 1; j < otuNames.size(); j++) { it = otuName2BinNumber.find(otuNames[j]); //do we have this otu in the shared file if (it != otuName2BinNumber.end()) { //we found it string bin = list->get(it->second); mergedNames += "," + bin; sizeOtu += otuLabel2ConsSize[otuNames[j]]; }else { m->mothurOut("[ERROR]: missing otu " + otuNames[j] + " from list file, cannot continue.\n"); m->setControl_pressed(true); break; } } int sumOtu = util.getNumNames(mergedNames); merged->push_back(mergedNames, sumOtu, newOTUName); outCons << newOTUName << '\t' << sizeOtu << '\t' << otuLabel2ConsTax[newOTUName] << endl; } if (m->getControl_pressed()) { delete merged; return 0; } //print new file merged->print(out, printHeaders); delete merged; outCons.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "process"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::mergeRelabundOTUs(vector& nodes){ try { string numNodes = toString(nodes.size()); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(relabundfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(relabundfile)); variables["[extension]"] = util.getExtension(relabundfile); string outputFileName = getOutputFileName("relabund", variables); outputTypes["relabund"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); InputData input(relabundfile, "relabund", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); bool printHeaders = true; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup, out, printHeaders, nodes); delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "mergeRelabundOTUs"); exit(1); } } //********************************************************************************************************************** int MergeOTUsCommand::process(SharedRAbundFloatVectors*& thisLookUp, ofstream& out, bool& printHeaders, vector& nodes){ try { vector groups = thisLookUp->getNamesGroups(); //create SharedRAbundVectors for the merged groups. Fill with blank rabundFloatVectors SharedRAbundFloatVectors* merged; merged = new SharedRAbundFloatVectors(); for (int i = 0; i < groups.size(); i++) { SharedRAbundFloatVector* myLookup = new SharedRAbundFloatVector(); myLookup->setLabel(thisLookUp->getLabel()); myLookup->setGroup(groups[i]); merged->push_back(myLookup); } //translate otuNames to bin numbers map otuName2BinNumber; map::iterator it; for (int j = 0; j < thisLookUp->getNumBins(); j++) { if (m->getControl_pressed()) { break; } otuName2BinNumber[thisLookUp->getOTUName(j)] = j; } if (m->getControl_pressed()) { delete merged; return 0; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxfile)); variables["[label]"] = thisLookUp->getLabel(); string outputFileName = getOutputFileName("constaxonomy", variables); outputTypes["constaxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream outCons; util.openOutputFile(outputFileName, outCons); outCons << "OTU\tSize\tTaxonomy\n"; for (int i = 0; i < nodes.size(); i++) { if (m->getControl_pressed()) { break; } vector otuNames = nodes[i].accessions; //otus to merge string newOTUName = otuNames[0]; vector abunds; abunds.resize(groups.size(), 0); for (int j = 0; j < otuNames.size(); j++) { it = otuName2BinNumber.find(otuNames[j]); //do we have this otu in the shared file if (it != otuName2BinNumber.end()) { //we found it vector thisOtusAbunds = thisLookUp->getOTU(it->second); for (int k = 0; k < thisOtusAbunds.size(); k++) { abunds[k] += thisOtusAbunds[k]; } //add this otus abunds to merged otu abunds }else { m->mothurOut("[ERROR]: missing otu " + otuNames[j] + " from relabund file, cannot continue.\n"); m->setControl_pressed(true); break; } } merged->push_back(abunds, newOTUName); //merge consensus taxonomy results float sumOtu = util.sum(abunds); outCons << newOTUName << '\t' << sumOtu << '\t' << otuLabel2ConsTax[newOTUName] << endl; } if (m->getControl_pressed()) { delete merged; return 0; } merged->eliminateZeroOTUS(); // remove any zero OTUs created by median option. //print new file merged->print(out, printHeaders); delete merged; outCons.close(); return 0; } catch(exception& e) { m->errorOut(e, "MergeOTUsCommand", "process"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mergeotuscommand.hpp000066400000000000000000000031241424121717000223040ustar00rootroot00000000000000// // mergeotuscommand.hpp // Mothur // // Created by Sarah Westcott on 12/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef mergeotuscommand_hpp #define mergeotuscommand_hpp #include "command.hpp" #include "phylotree.h" #include "inputdata.h" class MergeOTUsCommand : public Command { public: MergeOTUsCommand(string); ~MergeOTUsCommand(); vector setParameters(); string getCommandName() { return "merge.otus"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Merge.otus"; } string getDescription() { return "combine otus based on consensus taxonomy"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; string label, constaxfile, sharedfile, listfile, relabundfile; int taxLevelCutoff; vector Groups, outputNames; set labels; map otuLabel2ConsTax; map otuLabel2ConsSize; //for use with list file, since list file only contains uniques int mergeListOTUs(vector&); int mergeSharedOTUs(vector&); int mergeRelabundOTUs(vector&); int process(SharedRAbundVectors*&, ofstream&, bool&, vector& nodes); int process(ListVector*&, ofstream&, bool&, vector& nodes); int process(SharedRAbundFloatVectors*&, ofstream&, bool&, vector& nodes); }; #endif /* mergeotuscommand_hpp */ mothur-1.48.0/source/commands/mergesfffilecommand.cpp000077500000000000000000000410771424121717000227370ustar00rootroot00000000000000// // mergesfffilecommand.cpp // Mothur // // Created by Sarah Westcott on 1/31/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "mergesfffilecommand.h" #include "endiannessmacros.h" //******************************************************************************** MergeSfffilesCommand::~MergeSfffilesCommand(){ for (int i = 0; i < commonHeaders.size(); i++) { delete commonHeaders[i]; } commonHeaders.clear(); } //******************************************************************************** vector MergeSfffilesCommand::setParameters(){ try { CommandParameter psff("sff", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(psff); CommandParameter pfile("file", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(pfile); CommandParameter pkeytrim("keytrim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeytrim); CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["sff"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "setParameters"); exit(1); } } //********************************************************************************** string MergeSfffilesCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.sfffiles command reads a sff file or a file containing a list of sff files and merges the individual files into a single sff file. \n"; helpString += "The merge.sfffiles command parameters are sff, file and output. sff or file is required. \n"; helpString += "The sff parameter allows you to enter the sff list of sff files separated by -'s.\n"; helpString += "The file parameter allows you to provide a file containing a list of sff files to merge. \n"; helpString += "The keytrim parameter allows you to mergesff files with different keysequence by trimming them to the first 4 characters. Provided the first 4 match. \n"; helpString += "The output parameter allows you to provide an output filename. \n"; helpString += "Example sffinfo(sff=mySffFile.sff-mySecond.sff).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "getHelpString"); exit(1); } } //******************************************************************************* string MergeSfffilesCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sff") { pattern = "[filename],"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "getOutputPattern"); exit(1); } } //********************************************************************************** MergeSfffilesCommand::MergeSfffilesCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } sffFilename = validParameter.validPath(parameters, "sff"); if (sffFilename == "not found") { sffFilename = ""; } else { util.splitAtDash(sffFilename, filenames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < filenames.size(); i++) { bool ignore = false; if (filenames[i] == "current") { filenames[i] = current->getSFFFile(); if (filenames[i] != "") { m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current.\n"); } else { m->mothurOut("You have no current sfffile, ignoring current.\n"); ignore=true; //erase from file list filenames.erase(filenames.begin()+i); i--; } } if (!ignore) { if (inputDir != "") { string path = util.hasPath(filenames[i]); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { filenames[i] = inputDir + filenames[i]; } } bool ableToOpen = util.checkLocations(filenames[i], current->getLocations()); if (!ableToOpen) { m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded.\n"); filenames.erase(filenames.begin()+i); //erase from file list i--; }else { current->setSFFFile(filenames[i]); } } } } file = validParameter.validFile(parameters, "file"); if (file == "not open") { abort = true; } else if (file == "not found") { file = ""; } if ((file == "") && (filenames.size() == 0)) { m->mothurOut("[ERROR]: no valid files.\n"); abort = true; } if ((file != "") && (filenames.size() != 0)) { //both are given m->mothurOut("[ERROR]: cannot use file option and sff option at the same time, choose one.\n"); abort = true; } outputFile = validParameter.validPath(parameters, "output"); if (outputFile == "not found") { m->mothurOut("you must enter an output file name\n"); abort=true; } if (outputdir != "") { outputFile = outputdir + util.getSimpleName(outputFile); } string temp = validParameter.valid(parameters, "keytrim"); if (temp == "not found") { temp = "F"; } keyTrim = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "MergeSfffilesCommand"); exit(1); } } //***************************************************************************** int MergeSfffilesCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (file != "") { readFile(); if (outputdir == "") { outputdir = util.hasPath(file); } } ofstream out; map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(outputFile); } variables["[filename]"] = thisOutputDir + util.getSimpleName(outputFile); outputFile = getOutputFileName("sff",variables); util.openOutputFileBinary(outputFile, out); outputNames.push_back(outputFile); outputTypes["sff"].push_back(outputFile); outputFileHeader = outputFile + ".headers"; numTotalReads = 0; for (int s = 0; s < filenames.size(); s++) { if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } long start = time(nullptr); filenames[s] = util.getFullPathName(filenames[s]); m->mothurOut("\nMerging info from " + filenames[s] + " ..." ); m->mothurOutEndLine(); int numReads = mergeSffInfo(filenames[s], out); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to merge " + toString(numReads) + ".\n"); } out.close(); //create new common header and add to merged file adjustCommonHeader(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set sff file as new current sff file string currentName = ""; itTypes = outputTypes.find("sff"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSFFFile(currentName); } } //report output filenames m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "execute"); exit(1); } } //***************************************************************************** int MergeSfffilesCommand::mergeSffInfo(string input, ofstream& out){ try { currentFileName = input; ifstream in; util.openInputFileBinary(input, in); SffCommonHeader* header = new SffCommonHeader(); bool goodHeader = header->read(in); if (!goodHeader) { return 0; } commonHeaders.push_back(header); //save for adjustHeader sanity check //read through the sff file int count = 0; int numFlows = header->getNumFlows(); while (!in.eof()) { //read data SffRead* read = new SffRead(numFlows); bool okay = read->readSff(in); if (!okay) { break; } read->printSff(out); numTotalReads++; count++; delete read; //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } if (m->getControl_pressed()) { count = 0; break; } if (count >= header->getNumReads()) { break; } } //report progress if (!m->getControl_pressed()) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } } in.close(); return count; } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "mergeSffInfo"); exit(1); } } //**************************************************************************** void MergeSfffilesCommand::adjustCommonHeader(){ try { //sanity check bool okayMagic = true; bool okayVersion = true; bool okayHeader = true; bool okayKeyLength = true; bool okayNumFlows = true; bool okayformatCode = true; bool okayflowChar = true; bool okayKeySequence = true; if (commonHeaders.size() != 0) { unsigned int magicN = commonHeaders[0]->getMagicNumber(); string version = commonHeaders[0]->getVersion(); unsigned short headerLength = commonHeaders[0]->getHeaderLength(); unsigned short keyLength = commonHeaders[0]->getKeyLength(); unsigned short numFlows = commonHeaders[0]->getNumFlows(); int flowCode = commonHeaders[0]->getFlowgramFormat(); string flowChars = commonHeaders[0]->getFlows(); string keySeq = commonHeaders[0]->getKeySequence(); for (int i = 1; i < commonHeaders.size(); i++) { if (commonHeaders[i]->getMagicNumber() != magicN) { okayMagic = false; m->mothurOut("[ERROR]: merge issue with common headers. Magic numbers do not match. " + filenames[0] + " magic number is " + toString(magicN) + ", but " + filenames[i] + " magic number is " + toString(commonHeaders[i]->getMagicNumber()) + ".\n"); } if (commonHeaders[i]->getVersion() != version) { okayVersion = false; m->mothurOut("[ERROR]: merge issue with common headers. Versions do not match. " + filenames[0] + " version is " + version + ", but " + filenames[i] + " version is " + commonHeaders[i]->getVersion() + ".\n"); } if (commonHeaders[i]->getHeaderLength() != headerLength) { okayHeader = false; m->mothurOut("[ERROR]: merge issue with common headers. Header lengths do not match. " + filenames[0] + " header length is " + toString(headerLength) + ", but " + filenames[i] + " header length is " + toString(commonHeaders[i]->getHeaderLength()) + ".\n"); } if (commonHeaders[i]->getKeyLength() != keyLength) { okayKeyLength = false; m->mothurOut("[ERROR]: merge issue with common headers. Key Lengths do not match. " + filenames[0] + " Key length is " + toString(keyLength) + ", but " + filenames[i] + " key length is " + toString(commonHeaders[i]->getKeyLength()) + ".\n"); } if (commonHeaders[i]->getNumFlows() != numFlows) { okayNumFlows = false; m->mothurOut("[ERROR]: merge issue with common headers. Number of flows per read do not match. " + filenames[0] + " number of flows is " + toString(numFlows) + ", but " + filenames[i] + " number of flows is " + toString(commonHeaders[i]->getNumFlows()) + ".\n"); } if (commonHeaders[i]->getFlowgramFormat() != flowCode) { okayformatCode = false; m->mothurOut("[ERROR]: merge issue with common headers. Flow format codes do not match. " + filenames[0] + " Flow format code is " + toString(flowCode) + ", but " + filenames[i] + " flow format code is " + toString(commonHeaders[i]->getFlowgramFormat()) + ".\n"); } if (commonHeaders[i]->getFlows() != flowChars) { okayflowChar = false; m->mothurOut("[ERROR]: merge issue with common headers. Flow characters do not match. " + filenames[0] + " Flow characters are " + flowChars + ", but " + filenames[i] + " flow characters are " + commonHeaders[i]->getFlows() + ".\n"); } if (commonHeaders[i]->getKeySequence() != keySeq) { okayKeySequence = false; if (keyTrim) { m->mothurOut("[WARNING]: merge issue with common headers. Key sequences do not match. " + filenames[0] + " Key sequence is " + keySeq + ", but " + filenames[i] + " key sequence is " + commonHeaders[i]->getKeySequence() + ". We will attempt to trim them.\n"); }else { m->mothurOut("[ERROR]: merge issue with common headers. Key sequences do not match. " + filenames[0] + " Key sequence is " + keySeq + ", but " + filenames[i] + " key sequence is " + commonHeaders[i]->getKeySequence() + ".\n"); } } } }else { m->setControl_pressed(true); return; } //should never get here bool modify = false; if (!okayMagic || !okayVersion || !okayHeader || !okayKeyLength || !okayNumFlows || !okayformatCode || !okayflowChar) { m->setControl_pressed(true); return; } if (!okayKeySequence) { bool okayKeySequence2 = true; string keySeq = commonHeaders[0]->getKeySequence().substr(0,4); for (int i = 1; i < commonHeaders.size(); i++) { if ((commonHeaders[i]->getKeySequence().substr(0,4)) != keySeq) { okayKeySequence2 = false; } } if (okayKeySequence2 && keyTrim) { modify = true; m->mothurOut("We are able to trim the key sequences. Merged key seqeunce will be " + keySeq + ".\n"); } } ofstream out; util.openOutputFileBinaryAppend(outputFileHeader, out); commonHeaders[0]->printSampleCommonHeader(out, numTotalReads); out.close(); util.appendSFFFiles(outputFile, outputFileHeader); util.renameFile(outputFileHeader, outputFile); util.mothurRemove(outputFileHeader); } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "adjustCommonHeader"); exit(1); } } //************************************************************************************* void MergeSfffilesCommand::readFile(){ try { ifstream in; util.openInputFile(file, in); string filename; while(!in.eof()) { if (m->getControl_pressed()) { return; } in >> filename; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: filename = " + filename + ".\n"); } bool ableToOpen = util.checkLocations(filename, current->getLocations()); if (!ableToOpen) { //can't find it m->mothurOut("[WARNING]: can't find " + filename + ", ignoring.\n"); }else{ filenames.push_back(filename); } } in.close(); } catch(exception& e) { m->errorOut(e, "MergeSfffilesCommand", "readFile"); exit(1); } } //****************************************************************************************** mothur-1.48.0/source/commands/mergesfffilecommand.h000077500000000000000000000025641424121717000224020ustar00rootroot00000000000000// // mergesfffilecommand.h // Mothur // // Created by Sarah Westcott on 1/31/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #ifndef Mothur_mergesfffilecommand_h #define Mothur_mergesfffilecommand_h #include "command.hpp" #include "sffheader.hpp" #include "sffread.hpp" /**********************************************************/ class MergeSfffilesCommand : public Command { public: MergeSfffilesCommand(string); ~MergeSfffilesCommand(); vector setParameters(); string getCommandName() { return "merge.sfffiles"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/merge.sfffiles"; } string getDescription() { return "merge individual sfffiles into a single .sff file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sffFilename, file, currentFileName; vector filenames, outputNames; bool abort, keyTrim; int numTotalReads, allFilesnumFlowReads, allFileskeyLength; string outputFile, outputFileHeader; vector commonHeaders; //extract sff file functions int mergeSffInfo(string, ofstream&); void adjustCommonHeader(); void readFile(); }; /**********************************************************/ #endif mothur-1.48.0/source/commands/mergetaxsummarycommand.cpp000077500000000000000000000323271424121717000235310ustar00rootroot00000000000000// // mergetaxsummarycommand.cpp // Mothur // // Created by Sarah Westcott on 2/13/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "mergetaxsummarycommand.h" //********************************************************************************************************************** vector MergeTaxSummaryCommand::setParameters(){ try { CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput); CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["taxsummary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MergeTaxSummaryCommand::getHelpString(){ try { string helpString = ""; helpString += "The merge.taxsummary command takes a list of tax.summary files separated by dashes and merges them into one file."; helpString += "The merge.taxsummary command parameters are input and output."; helpString += "Example merge.taxsummary(input=small.tax.summary-large.tax.summary, output=all.tax.summary)."; return helpString; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** MergeTaxSummaryCommand::MergeTaxSummaryCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true; } else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } string fileList = validParameter.validPath(parameters, "input"); if(fileList == "not found") { m->mothurOut("you must enter two or more file names\n"); abort=true; } else{ util.splitAtDash(fileList, fileNames); } numInputFiles = fileNames.size(); ifstream testFile; if(numInputFiles == 0){ m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names\n"); abort=true; } else{ for(int i=0;igetLocations()); if (!ableToOpen) { m->mothurOut("Unable to open " + fileNames[i] + ". It will be disregarded.\n"); //erase from file list fileNames.erase(fileNames.begin()+i); i--; } } } outputFileName = validParameter.validPath(parameters, "output"); if (outputFileName == "not found") { m->mothurOut("you must enter an output file name\n"); abort=true; } else if (outputdir != "") { outputFileName = outputdir + util.getSimpleName(outputFileName); } } } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand"); exit(1); } } //********************************************************************************************************************** int MergeTaxSummaryCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } outputFileName = util.getFullPathName(outputFileName); util.mothurRemove(outputFileName); vector tree; tree.push_back(rawTaxNode("Root")); tree[0].rank = "0"; bool hasGroups = true; set groups; for (int i = 0; i < fileNames.size(); i++) { ifstream in; util.openInputFile(fileNames[i], in); string temp = util.getline(in); gobble(in); vector headers = util.splitWhiteSpace(temp); vector thisFilesGroups; if (headers.size() == 5) { hasGroups = false; } else { for (int j = 5; j < headers.size(); j++) { groups.insert(headers[j]); thisFilesGroups.push_back(headers[j]); } } int level, daugterLevels, total; float totalFloat; string rankId, tax; tax = ""; map levelToCurrentNode; levelToCurrentNode[0] = 0; while (!in.eof()) { if (m->getControl_pressed()) { return 0; } in >> level >> rankId; gobble(in); string rest = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpaceWithQuotes(rest); map groupCounts; int pcount = pieces.size()-1; if (thisFilesGroups.size() != 0) { for (int j = thisFilesGroups.size()-1; j >= 0; j--) { int tempNum; util.mothurConvert(pieces[pcount], tempNum); groupCounts[thisFilesGroups[j]] = tempNum; pcount--; } } //column 5 util.mothurConvert(pieces[pcount], totalFloat); pcount--; if ((totalFloat < 1) && (totalFloat > 0)) { m->mothurOut("[ERROR]: cannot merge tax.summary files with relative abundances.\n"); m->setControl_pressed(true); in.close(); return 0; }else { total = int(totalFloat); } //column 4 util.mothurConvert(pieces[pcount], daugterLevels); //assemble tax - this is done in case taxonomy contains spaces tax = ""; for (int k = 0; k < pcount; k++) { tax += pieces[k] + " "; } if (level == 0) {} else { map::iterator itParent = levelToCurrentNode.find(level-1); int parent = 0; if (itParent == levelToCurrentNode.end()) { m->mothurOut("[ERROR]: situation I didn't expect.\n"); } else { parent = itParent->second; } levelToCurrentNode[level] = addTaxToTree(tree, level, parent, tax, total, groupCounts); } } in.close(); } if (!hasGroups && (groups.size() != 0)) { groups.clear(); m->mothurOut("[WARNING]: not all files contain group breakdown, ignoring group counts.\n"); } ofstream out; util.openOutputFile(outputFileName, out); print(out, tree, groups); outputNames.push_back(outputFileName);outputTypes["taxsummary"].push_back(outputFileName); if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(outputFileName+"\n\n"); return 0; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "execute"); exit(1); } } /**************************************************************************************************/ int MergeTaxSummaryCommand::addTaxToTree(vector& tree, int level, int currentNode, string taxon, int total, map groups){ try { map::iterator childPointer; childPointer = tree[currentNode].children.find(taxon); int nodeToIncrement = 0; if(childPointer != tree[currentNode].children.end()){ //if the node already exists, increment counts nodeToIncrement = childPointer->second; tree[nodeToIncrement].total += total; for (map::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) { map::iterator it = tree[nodeToIncrement].groupCount.find(itGroups->first); if (it == tree[nodeToIncrement].groupCount.end()) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; } else { it->second += itGroups->second; } } } else{ //otherwise, create it tree.push_back(rawTaxNode(taxon)); tree[currentNode].children[taxon] = tree.size()-1; tree[tree.size()-1].parent = currentNode; nodeToIncrement = tree.size()-1; tree[nodeToIncrement].total = total; tree[nodeToIncrement].level = level; for (map::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; } } return nodeToIncrement; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "addSeqToTree"); exit(1); } } /**************************************************************************************************/ int MergeTaxSummaryCommand::assignRank(int index, vector& tree){ try { map::iterator it; int counter = 1; for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ if (m->getControl_pressed()) { return 0; } tree[it->second].rank = tree[index].rank + '.' + toString(counter); counter++; assignRank(it->second, tree); } return 0; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "assignRank"); exit(1); } } /**************************************************************************************************/ int MergeTaxSummaryCommand::print(ofstream& out, vector& tree, set groups){ try { assignRank(0, tree); vector mGroups; //print labels out << "taxlevel\trankID\ttaxon\tdaughterlevels\ttotal"; for (set::iterator it = groups.begin(); it != groups.end(); it++) { out << '\t' << (*it) ; } out << endl; for (set::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { tree[0].groupCount[*it2] = 0; } map::iterator it; for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ tree[0].total += tree[it->second].total; for (set::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { map:: iterator itGroups = tree[it->second].groupCount.find(*it2); if (itGroups != tree[it->second].groupCount.end()) { tree[0].groupCount[*it2] += itGroups->second; } } } //print root out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total; for (set::iterator it = groups.begin(); it != groups.end(); it++) { map:: iterator itGroups = tree[0].groupCount.find(*it); int num = 0; if (itGroups != tree[0].groupCount.end()) { num = itGroups->second; } out << '\t' << num; } out << endl; //print rest print(0, out, tree, groups); return 0; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "print"); exit(1); } } /**************************************************************************************************/ int MergeTaxSummaryCommand::print(int i, ofstream& out, vector& tree, set groups){ try { map::iterator it; for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ //print root out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total; for (set::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { map:: iterator itGroups = tree[it->second].groupCount.find(*it2); int num = 0; if (itGroups != tree[it->second].groupCount.end()) { num = itGroups->second; } out << '\t' << num ; } out << endl; print(it->second, out, tree, groups); } return 0; } catch(exception& e) { m->errorOut(e, "MergeTaxSummaryCommand", "print"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mergetaxsummarycommand.h000077500000000000000000000024431424121717000231720ustar00rootroot00000000000000// // mergetaxsummarycommand.h // Mothur // // Created by Sarah Westcott on 2/13/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_mergetaxsummarycommand_h #define Mothur_mergetaxsummarycommand_h #include "mothur.h" #include "command.hpp" #include "phylosummary.h" class MergeTaxSummaryCommand : public Command { public: MergeTaxSummaryCommand(string); ~MergeTaxSummaryCommand(){} vector setParameters(); string getCommandName() { return "merge.taxsummary"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string){ return ""; } string getCitation() { return "http://www.mothur.org/wiki/Merge.taxsummary"; } string getDescription() { return "merges tax summary files creating one file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector fileNames, outputNames; string outputFileName; int numInputFiles; bool abort; int addTaxToTree(vector&, int, int, string, int, map); int assignRank(int index, vector& tree); int print(ofstream& out, vector& tree, set groups); int print(int, ofstream& out, vector& tree, set groups); }; #endif mothur-1.48.0/source/commands/metastatscommand.cpp000077500000000000000000000617641424121717000223130ustar00rootroot00000000000000/* * metastatscommand.cpp * Mothur * * Created by westcott on 9/16/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "metastatscommand.h" //CommandParameter(string n, string t, string o, string d, string only, string atLeast, string linked, string opt, bool m, bool r, bool i) : name(n), type(t), options(o), optionsDefault(d), chooseOnlyOneGroup(only), chooseAtLeastOneGroup(atLeast), linkedGroup(linked), outputTypes(opt),multipleSelectionAllowed(m), required(r), important(i) {} //********************************************************************************************************************** vector MetaStatsCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "shared-clr", "none", "none","metastats",false,false,true); parameters.push_back(pshared); CommandParameter pclr("clr", "InputTypes", "", "", "shared-clr", "none", "none","metastats",false,false,true); parameters.push_back(pclr); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["metastats"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MetaStatsCommand::getHelpString(){ try { string helpString = ""; helpString += "This command is based on the Metastats program, White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).\n"; helpString += "The metastats command outputs a .metastats file. \n"; helpString += "The metastats command parameters are shared, clr, iters, threshold, groups, label, design, sets and processors. The shared or clr and design parameters are required, unless you have valid current files.\n"; helpString += "The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n"; helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n"; helpString += "The iters parameter allows you to set number of bootstrap permutations for estimating null distribution of t statistic. The default is 1000. \n"; helpString += "The threshold parameter allows you to set the significance level to reject null hypotheses (default 0.05).\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The metastats command should be in the following format: metastats(design=yourDesignFile).\n"; helpString += "Example metastats(design=temp.design, groups=A-B-C).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MetaStatsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MetaStatsCommand::MetaStatsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); inputfile = sharedfile; format = "sharedfile"; } clrfile = validParameter.validFile(parameters, "clr"); if (clrfile == "not open") { abort = true; } else if (clrfile == "not found") { clrfile = ""; } else { current->setCLRFile(clrfile); inputfile = clrfile; format = "clrfile"; m->mothurOut("[NOTE]: When using a clr file mothur will run the fisher exact test with the floor of the values generated.\n"); } if ((sharedfile == "") && (clrfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { clrfile = current->getCLRFile(); if (clrfile != "") { inputfile = clrfile; format = "clrfile"; m->mothurOut("Using " + clrfile + " as input file for the clr parameter.\n"); m->mothurOut("[NOTE]: When using a clr file mothur will run the fisher exact test with the floor of the values generated.\n"); } else { m->mothurOut("No valid current files. You must provide a clrfile or shared file.\n"); abort = true; } } } //check for required parameters designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { abort = true; } else if (designfile == "not found") { //if there is a current design file, use it designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { m->mothurOut("You have no current designfile and the design parameter is required.\n"); abort = true; } }else { current->setDesignFile(designfile); } if (outputdir == ""){ outputdir += util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); if (Sets.size() != 0) { if (Sets[0] != "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "threshold"); if (temp == "not found") { temp = "0.05"; } util.mothurConvert(temp, threshold); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); } } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "MetaStatsCommand"); exit(1); } } //********************************************************************************************************************** int MetaStatsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } DesignMap* designMap = new DesignMap(designfile); if (m->getControl_pressed()) { delete designMap; return 0; } InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = nullptr; SharedCLRVectors* clr = nullptr; if (format == "sharedfile") { lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); }else { clr = util.getNextCLR(input, allLines, userLabels, processedLabels, lastLabel); Groups = clr->getNamesGroups(); } if (Sets.size() == 0) { Sets = designMap->getCategory(); } int numGroups = (int)Sets.size(); for (int a=0; a groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]); namesOfGroupCombos.push_back(groups); } } if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("[ERROR]: Not enough sets, I need at least 2 valid sets. Unable to complete command.\n"); m->setControl_pressed(true); } while ((lookup != nullptr) || (clr != nullptr)){ if (m->getControl_pressed()) { if (lookup != nullptr) { delete lookup; } if (clr != nullptr) { delete clr; }break; } if (format == "sharedfile") { process(lookup, designMap); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } else { process(clr, designMap); delete clr; clr = util.getNextCLR(input, allLines, userLabels, processedLabels, lastLabel); } } delete designMap; if (m->getControl_pressed()) { outputTypes.clear(); if (lookup != nullptr) { delete lookup; } if (clr != nullptr) { delete clr; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "execute"); exit(1); } } /**************************************************************************************************/ struct metastatsData { SharedRAbundVectors* thisLookUp; SharedCLRVectors* thisCLR; vector< vector > namesOfGroupCombos; vector designMapGroups, outputNames; int start, num, iters, count; float threshold; Utils util; MothurOut* m; metastatsData(){} metastatsData(int st, int en, vector on, vector< vector > ns, SharedRAbundVectors*& lu, vector dg, int i, float thr) { m = MothurOut::getInstance(); outputNames = on; start = st; num = en; namesOfGroupCombos = ns; thisLookUp = lu; designMapGroups = dg; iters = i; threshold = thr; count=0; thisCLR = nullptr; } metastatsData(int st, int en, vector on, vector< vector > ns, SharedCLRVectors*& lu, vector dg, int i, float thr) { m = MothurOut::getInstance(); outputNames = on; start = st; num = en; namesOfGroupCombos = ns; thisCLR = lu; designMapGroups = dg; iters = i; threshold = thr; count=0; thisLookUp = nullptr; } }; //********************************************************************************************************************** int driverShared(metastatsData* params) { try { vector thisLookupNames = params->thisLookUp->getNamesGroups(); vector thisLookupRabunds = params->thisLookUp->getSharedRAbundVectors(); //for each combo for (int c = params->start; c < (params->start+params->num); c++) { //get set names string setA = params->namesOfGroupCombos[c][0]; string setB = params->namesOfGroupCombos[c][1]; string outputFileName = params->outputNames[c]; vector< vector > data2; data2.resize(params->thisLookUp->getNumBins()); vector subset; vector subsetGroups; int setACount = 0; int setBCount = 0; for (int i = 0; i < params->thisLookUp->size(); i++) { string thisGroup = thisLookupNames[i]; if (params->designMapGroups[i] == setB) { subset.push_back(thisLookupRabunds[i]); subsetGroups.push_back(thisGroup); setBCount++; }else if (params->designMapGroups[i] == setA) { subset.insert(subset.begin()+setACount, thisLookupRabunds[i]); subsetGroups.insert(subsetGroups.begin()+setACount, thisGroup); setACount++; } } if ((setACount == 0) || (setBCount == 0)) { params->m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison.\n"); } else { for (int j = 0; j < params->thisLookUp->getNumBins(); j++) { data2[j].resize(subset.size(), 0.0); for (int i = 0; i < subset.size(); i++) { data2[j][i] = (subset[i]->get(j)); } } params->m->mothurOut("\nComparing " + setA + " and " + setB + "...\n"); MothurMetastats mothurMeta(params->threshold, params->iters); mothurMeta.runMetastats(outputFileName , data2, setACount, params->thisLookUp->getOTUNames(), true); params->m->mothurOutEndLine(); } } for(int i = 0; i < thisLookupRabunds.size(); i++) { delete thisLookupRabunds[i]; } return 0; } catch(exception& e) { params->m->errorOut(e, "MetaStatsCommand", "driver"); exit(1); } } //********************************************************************************************************************** int MetaStatsCommand::process(SharedRAbundVectors*& thisLookUp, DesignMap*& designMap){ try { vector lines; vector workerThreads; vector data; int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; vector thisLabelsOutputFiles; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs for (int i = startIndex; i < startIndex+numPairs; i++) { //get set names string setA = namesOfGroupCombos[i][0]; string setB = namesOfGroupCombos[i][1]; //get filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = thisLookUp->getLabel(); variables["[group]"] = setA + "-" + setB; string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); thisLabelsOutputFiles.push_back(outputFileName); } startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } vector designMapGroups = thisLookUp->getNamesGroups(); for (int j = 0; j < designMapGroups.size(); j++) { designMapGroups[j] = designMap->get(designMapGroups[j]); } //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookUp); metastatsData* dataBundle = new metastatsData(lines[i+1].start, lines[i+1].end, thisLabelsOutputFiles, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold); data.push_back(dataBundle); std::thread* thisThread = new std::thread(driverShared, dataBundle); workerThreads.push_back(thisThread); } metastatsData* dataBundle = new metastatsData(lines[0].start, lines[0].end, thisLabelsOutputFiles, namesOfGroupCombos, thisLookUp, designMapGroups, iters, threshold); driverShared(dataBundle); for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); delete data[i]->thisLookUp; delete data[i]; delete workerThreads[i]; } delete dataBundle; return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "process"); exit(1); } } //********************************************************************************************************************** int driverCLR(metastatsData* params) { try { vector thisLookupNames = params->thisCLR->getNamesGroups(); vector thisCLRRabunds = params->thisCLR->getSharedCLRVectors(); //for each combo for (int c = params->start; c < (params->start+params->num); c++) { //get set names string setA = params->namesOfGroupCombos[c][0]; string setB = params->namesOfGroupCombos[c][1]; string outputFileName = params->outputNames[c]; vector< vector > data2; data2.resize(params->thisCLR->getNumBins()); vector subset; int setACount = 0; int setBCount = 0; for (int i = 0; i < params->thisCLR->size(); i++) { string thisGroup = thisLookupNames[i]; if (params->designMapGroups[i] == setB) { subset.push_back(thisCLRRabunds[i]); setBCount++; }else if (params->designMapGroups[i] == setA) { subset.insert(subset.begin()+setACount, thisCLRRabunds[i]); setACount++; } } if ((setACount == 0) || (setBCount == 0)) { params->m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison.\n"); } else { for (int j = 0; j < params->thisCLR->getNumBins(); j++) { data2[j].resize(subset.size(), 0.0); for (int i = 0; i < subset.size(); i++) { data2[j][i] = (subset[i]->get(j)); } } params->m->mothurOut("\nComparing " + setA + " and " + setB + "...\n"); MothurMetastats mothurMeta(params->threshold, params->iters); mothurMeta.runMetastats(outputFileName, data2, setACount, params->thisCLR->getOTUNames(), false); params->m->mothurOutEndLine(); } } for(int i = 0; i < thisCLRRabunds.size(); i++) { delete thisCLRRabunds[i]; } return 0; } catch(exception& e) { params->m->errorOut(e, "MetaStatsCommand", "driver"); exit(1); } } //********************************************************************************************************************** int MetaStatsCommand::process(SharedCLRVectors*& thisCLR, DesignMap*& designMap){ try { vector lines; vector workerThreads; vector data; int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; vector thisLabelsOutputFiles; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs for (int i = startIndex; i < startIndex+numPairs; i++) { //get set names string setA = namesOfGroupCombos[i][0]; string setB = namesOfGroupCombos[i][1]; //get filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = thisCLR->getLabel(); variables["[group]"] = setA + "-" + setB; string outputFileName = getOutputFileName("metastats",variables); outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName); thisLabelsOutputFiles.push_back(outputFileName); } startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } vector designMapGroups = thisCLR->getNamesGroups(); for (int j = 0; j < designMapGroups.size(); j++) { designMapGroups[j] = designMap->get(designMapGroups[j]); } //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations SharedCLRVectors* newLookup = new SharedCLRVectors(*thisCLR); metastatsData* dataBundle = new metastatsData(lines[i+1].start, lines[i+1].end, thisLabelsOutputFiles, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold); data.push_back(dataBundle); std::thread* thisThread = new std::thread(driverCLR, dataBundle); workerThreads.push_back(thisThread); } metastatsData* dataBundle = new metastatsData(lines[0].start, lines[0].end, thisLabelsOutputFiles, namesOfGroupCombos, thisCLR, designMapGroups, iters, threshold); driverCLR(dataBundle); for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); delete data[i]->thisLookUp; delete data[i]; delete workerThreads[i]; } delete dataBundle; return 0; } catch(exception& e) { m->errorOut(e, "MetaStatsCommand", "process"); exit(1); } } //**********************************************************************************************************************/ mothur-1.48.0/source/commands/metastatscommand.h000077500000000000000000000033311424121717000217420ustar00rootroot00000000000000#ifndef METASTATSCOMMAND_H #define METASTATSCOMMAND_H /* * metastatscommand.h * Mothur * * Created by westcott on 9/16/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "sharedrabundvectors.hpp" #include "sharedclrvectors.hpp" #include "mothurmetastats.h" #include "designmap.h" /**************************************************************************************************/ class MetaStatsCommand : public Command { public: MetaStatsCommand(string); ~MetaStatsCommand() = default; vector setParameters(); string getCommandName() { return "metastats"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "White JR, Nagarajan N, Pop M (2009). Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5: e1000352. \nhttp://www.mothur.org/wiki/Metastats"; } string getDescription() { return "detects differentially abundant features in clinical metagenomic samples"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, pickedGroups; set labels; //holds labels to be used string groups, label, inputDir, designfile, sets, sharedfile, clrfile, inputfile, format; vector Groups, outputNames, Sets; vector< vector > namesOfGroupCombos; int iters, processors; float threshold; int process(SharedRAbundVectors*&, DesignMap*&); int process(SharedCLRVectors*&, DesignMap*&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/mgclustercommand.cpp000077500000000000000000001105671424121717000223070ustar00rootroot00000000000000/* * mgclustercommand.cpp * Mothur * * Created by westcott on 12/11/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mgclustercommand.h" //********************************************************************************************************************** vector MGClusterCommand::setParameters(){ try { CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength); CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty); CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-opti", "opti", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pinitialize("initialize", "Multiple", "oneotu-singleton", "singleton", "", "", "","",false,false,true); parameters.push_back(pinitialize); CommandParameter pmetric("metric", "Multiple", "mcc-sens-spec-tptn-fpfn-tp-tn-fp-fn-f1score-accuracy-ppv-npv-fdr", "mcc", "", "", "","",false,false,true); parameters.push_back(pmetric); CommandParameter pmetriccutoff("delta", "Number", "", "0.0001", "", "", "","",false,false,true); parameters.push_back(pmetriccutoff); CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(piters); CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin); CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge); CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust); CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["steps"] = tempOutNames; outputTypes["sensspec"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MGClusterCommand::getHelpString(){ try { string helpString = ""; helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, method, metric, initialize, iters, merge, min, length, penalty and adjust. The blast parameter is required.\n"; helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similar to the OTUs.\n"; helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n"; helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n"; helpString += "The precision parameter's default value is 100. \n"; helpString += "The acceptable mgcluster methods are furthest, nearest, average and opti. If no method is provided then opti is assumed.\n"; helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n"; helpString += "The iters parameter allow you to set the maxiters for the opticluster method. \n"; helpString += "The metric parameter allows to select the metric in the opticluster method. Options are Matthews correlation coefficient (mcc), sensitivity (sens), specificity (spec), true positives + true negatives (tptn), false positives + false negatives (fpfn), true positives (tp), true negative (tn), false positive (fp), false negative (fn), f1score (f1score), accuracy (accuracy), positive predictive value (ppv), negative predictive value (npv), false discovery rate (fdr). Default=mcc.\n"; helpString += "The initialize parameter allows to select the initial randomization for the opticluster method. Options are singleton, meaning each sequence is randomly assigned to its own OTU, or oneotu meaning all sequences are assigned to one otu. Default=singleton.\n"; helpString += "The delta parameter allows to set the stable value for the metric in the opticluster method (delta=0.0001). \n"; helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n"; helpString += "The adjust parameter is used to handle missing distances. If you set a cutoff, adjust=f by default. If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method. Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n"; helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n"; helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n"; helpString += "The mgcluster command should be in the following format: \n"; helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string MGClusterCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } else if (type == "steps") { pattern = "[filename],[clustertag],steps"; } else if (type == "sensspec") { pattern = "[filename],[clustertag],sensspec"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** MGClusterCommand::MGClusterCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; blastfile = validParameter.validFile(parameters, "blast"); if (blastfile == "not open") { blastfile = ""; abort = true; } else if (blastfile == "not found") { blastfile = ""; } if (outputdir == ""){ outputdir += util.hasPath(blastfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other.\n"); abort = true; } if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile.\n"); abort = true; } //check for optional parameter and set defaults string temp; temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } precisionLength = temp.length(); util.mothurConvert(temp, precision); cutoffSet = false; temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "0.70"; } else { cutoffSet = true; } util.mothurConvert(temp, cutoff); method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "opti"; } if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "opti")) { } else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average or opti.\n"); abort = true; } metric = validParameter.valid(parameters, "metric"); if (metric == "not found") { metric = "mcc"; } if ((metric == "mcc") || (metric == "sens") || (metric == "spec") || (metric == "tptn") || (metric == "tp") || (metric == "tn") || (metric == "fp") || (metric == "fn") || (metric == "f1score") || (metric == "accuracy") || (metric == "ppv") || (metric == "npv") || (metric == "fdr") || (metric == "fpfn") ){ } else { m->mothurOut("[ERROR]: Not a valid metric. Valid metrics are mcc, sens, spec, tp, tn, fp, fn, tptn, fpfn, f1score, accuracy, ppv, npv, fdr.\n"); abort = true; } initialize = validParameter.valid(parameters, "initialize"); if (initialize == "not found") { initialize = "singleton"; } if ((initialize == "singleton") || (initialize == "oneotu")){ } else { m->mothurOut("[ERROR]: Not a valid initialization. Valid initializations are singleton and oneotu.\n"); abort = true; } temp = validParameter.valid(parameters, "delta"); if (temp == "not found") { temp = "0.0001"; } util.mothurConvert(temp, stableMetric); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "length"); if (temp == "not found") { temp = "5"; } util.mothurConvert(temp, length); temp = validParameter.valid(parameters, "penalty"); if (temp == "not found") { temp = "0.10"; } util.mothurConvert(temp, penalty); temp = validParameter.valid(parameters, "min"); if (temp == "not found") { temp = "true"; } minWanted = util.isTrue(temp); temp = validParameter.valid(parameters, "merge"); if (temp == "not found") { temp = "true"; } merge = util.isTrue(temp); temp = validParameter.valid(parameters, "adjust"); if (temp == "not found") { if (cutoffSet) { temp = "F"; }else { temp="T"; } } if (util.isNumeric1(temp)) { util.mothurConvert(temp, adjust); } else if (util.isTrue(temp)) { adjust = 1.0; } else { adjust = -1.0; } } } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "MGClusterCommand"); exit(1); } } //********************************************************************************************************************** int MGClusterCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } fileroot = outputdir + util.getRootName(util.getSimpleName(blastfile)); tag = ""; if (method == "furthest") { tag = "fn"; } else if (method == "nearest") { tag = "nn"; } else if (method == "average") { tag = "an"; } else if (method == "opti") { tag = "opti"; } if (method == "opti") { runOptiCluster(); } else { runMothurCluster(); } m->mothurOut("\nOutput File Names: \n"); m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); if (countfile == "") { m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); } m->mothurOutEndLine(); //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "execute"); exit(1); } } //********************************************************************************************************************** void MGClusterCommand::printData(ListVector* mergedList, map& counts, bool& ph){ try { mergedList->setPrintedLabels(ph); ph = false; if (countfile != "") { mergedList->print(listFile, counts); }else { mergedList->print(listFile, true); } SAbundVector sabund = mergedList->getSAbundVector(); if (countfile == "") { mergedList->getRAbundVector().print(rabundFile); sabund.print(sabundFile); } sabund.print(cout); } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "printData"); exit(1); } } //********************************************************************************************************************** int MGClusterCommand::runOptiCluster(){ try { if (!cutoffSet) { m->mothurOut("\nYou did not set a cutoff, using 0.03.\n"); cutoff = 0.03; } string nameOrCount = ""; string thisNamefile = ""; map counts; if (countfile != "") { nameOrCount = "count"; thisNamefile = countfile; CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } else if (namefile != "") { nameOrCount = "name"; thisNamefile = namefile; } string distfile = blastfile; time_t start = time(nullptr); OptiData* matrix; matrix = new OptiBlastMatrix(distfile, thisNamefile, nameOrCount, false, cutoff, length, penalty, minWanted); ClusterMetric* metricCalc = nullptr; if (metric == "mcc") { metricCalc = new MCC(); } else if (metric == "sens") { metricCalc = new Sensitivity(); } else if (metric == "spec") { metricCalc = new Specificity(); } else if (metric == "tptn") { metricCalc = new TPTN(); } else if (metric == "tp") { metricCalc = new TP(); } else if (metric == "tn") { metricCalc = new TN(); } else if (metric == "fp") { metricCalc = new FP(); } else if (metric == "fn") { metricCalc = new FN(); } else if (metric == "f1score") { metricCalc = new F1Score(); } else if (metric == "accuracy") { metricCalc = new Accuracy(); } else if (metric == "ppv") { metricCalc = new PPV(); } else if (metric == "npv") { metricCalc = new NPV(); } else if (metric == "fdr") { metricCalc = new FDR(); } else if (metric == "fpfn") { metricCalc = new FPFN(); } OptiCluster cluster(matrix, metricCalc, 0); string tag = cluster.getTag(); map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; sabundFileName = getOutputFileName("sabund", variables); rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } listFileName = getOutputFileName("list", variables); string outputName = getOutputFileName("steps", variables); outputNames.push_back(outputName); outputTypes["steps"].push_back(outputName); m->mothurOutEndLine(); m->mothurOut("Clustering " + distfile); m->mothurOutEndLine(); if (outputdir == "") { outputdir += util.hasPath(distfile); } ofstream listFile; util.openOutputFile(listFileName, listFile); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); ofstream outStep; util.openOutputFile(outputName, outStep); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; cluster.initialize(listVectorMetric, true, initialize); long long numBins = cluster.getNumBins(); m->mothurOut("\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); outStep << "iter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); m->mothurOut("0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(cutoff) + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); outStep << "0\t0\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t" << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); outStep << results[i] << "\t"; } m->mothurOutEndLine(); outStep << endl; while ((delta > stableMetric) && (iters < maxIters)) { long start = time(nullptr); if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); m->mothurOut(toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t"+ toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); outStep << (toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t" + toString(cutoff) + "\t") << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); outStep << results[i] << "\t"; } m->mothurOutEndLine(); outStep << endl; } m->mothurOutEndLine(); m->mothurOutEndLine(); list = cluster.getList(); list->setLabel(toString(cutoff)); if (merge) { vector< set > overlap = matrix->getBlastOverlap(); //assign each sequence to bins map seqToBin; for (long long i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { break; } string bin = list->get(i); vector names; util.splitAtComma(bin, names); for (long long j = 0; j < names.size(); j++) { seqToBin[names[j]] = i; } } //merge overlapping bins long long mergedBinCount = 0; for (long long i = 0; i < overlap.size(); i++) { set temp = overlap[i]; overlap[i].clear(); for (set::iterator itOverlap = temp.begin(); itOverlap != temp.end(); itOverlap++) { string firstName = matrix->getOverlapName(i); string secondName = matrix->getOverlapName(*itOverlap); long long binKeep = seqToBin[firstName]; long long binRemove = seqToBin[secondName]; if(binKeep != binRemove) { //save names in old bin string bin = list->get(binRemove); //merge bins into name1s bin list->set(binKeep, bin+','+list->get(binKeep)); list->set(binRemove, ""); vector binNames; util.splitAtComma(bin, binNames); //update binInfo //save name and new bin number for (int k = 0; k < binNames.size(); k++) { seqToBin[binNames[k]] = binKeep; } mergedBinCount++; } } } if (mergedBinCount != 0) { m->mothurOut("Merged " + toString(mergedBinCount) + " OTUs based on blast overlap.\n\n"); } } if(countfile != "") { list->print(listFile, counts); } else { list->print(listFile); } listFile.close(); variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); SAbundVector sabund = list->getSAbundVector(); sabund.print(sabundFile); sabundFile.close(); RAbundVector rabund = list->getRAbundVector(); rabund.print(rabundFile); rabundFile.close(); } delete list; string sensspecFilename = fileroot+ tag + ".sensspec"; ofstream sensFile; util.openOutputFile(sensspecFilename, sensFile); outputNames.push_back(sensspecFilename); outputTypes["sensspec"].push_back(sensspecFilename); sensFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; results = cluster.getStats(tp, tn, fp, fn); sensFile << cutoff << '\t' << cutoff << '\t' << tp << '\t' << tn << '\t' << fp << '\t' << fn << '\t'; for (int i = 0; i < results.size(); i++) { sensFile << results[i] << '\t'; } sensFile << '\n'; sensFile.close(); m->mothurOut("It took " + toString(time(nullptr) - start) + " seconds to cluster.\n"); delete metricCalc; delete matrix; return 0; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "runOptiCluster"); exit(1); } } //********************************************************************************************************************** int MGClusterCommand::runMothurCluster(){ try { //read names file map counts; if (namefile != "") { nameMap = new NameAssignment(namefile); nameMap->readMap(); }else if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, false, false); nameMap= new NameAssignment(); vector tempNames = ct->getNamesOfSeqs(); for (int i = 0; i < tempNames.size(); i++) { nameMap->push_back(tempNames[i]); } counts = ct->getNameMap(); }else{ nameMap= new NameAssignment(); } map variables; variables["[filename]"] = fileroot; variables["[clustertag]"] = tag; sabundFileName = getOutputFileName("sabund", variables); rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } listFileName = getOutputFileName("list", variables); float previousDist = 0.00000; float rndPreviousDist = 0.00000; time_t start = time(nullptr); //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector //must remember to delete those objects here since readBlast does not read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted); read->read(nameMap); list = new ListVector(nameMap->getListVector()); RAbundVector* rabund = nullptr; if(countfile != "") { rabund = new RAbundVector(); createRabund(ct, list, rabund); }else { rabund = new RAbundVector(list->getRAbundVector()); } if (m->getControl_pressed()) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; } oldList = *list; map Seq2Bin; map oldSeq2Bin; if (countfile == "") { util.openOutputFile(sabundFileName, sabundFile); util.openOutputFile(rabundFileName, rabundFile); } util.openOutputFile(listFileName, listFile); if (m->getControl_pressed()) { delete nameMap; delete read; delete list; delete rabund; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } double saveCutoff = cutoff; bool printHeaders = true; //get distmatrix and overlap SparseDistanceMatrix* distMatrix = read->getDistMatrix(); overlapMatrix = read->getOverlapMatrix(); //already sorted by read delete read; //create cluster if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); } else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); } else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); } cluster->setMapWanted(true); Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; if (m->getControl_pressed()) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } //cluster using cluster classes while (distMatrix->getSmallDist() <= cutoff && distMatrix->getNNodes() > 0){ if (m->getDebug()) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; } cluster->update(cutoff); if (m->getControl_pressed()) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } float dist = distMatrix->getSmallDist(); float rndDist = util.ceilDist(dist, precision); if(previousDist <= 0.0000 && !util.isEqual(dist, previousDist)){ oldList.setLabel("unique"); printData(&oldList, counts, printHeaders); } else if(!util.isEqual(rndDist, rndPreviousDist)){ if (merge) { ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist); if (m->getControl_pressed()) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist)); printData(temp, counts, printHeaders); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist)); printData(&oldList, counts, printHeaders); } } previousDist = dist; rndPreviousDist = rndDist; oldList = *list; Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; } if(previousDist <= 0.0000){ oldList.setLabel("unique"); printData(&oldList, counts, printHeaders); } else if(rndPreviousDistgetControl_pressed()) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist)); printData(temp, counts, printHeaders); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist)); printData(&oldList, counts, printHeaders); } } //free memory overlapMatrix.clear(); delete distMatrix; delete cluster; delete list; delete rabund; listFile.close(); if (countfile == "") { sabundFile.close(); rabundFile.close(); } if (m->getControl_pressed()) { delete nameMap; listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); util.mothurRemove(rabundFileName); util.mothurRemove(sabundFileName); } util.mothurRemove(listFileName); outputTypes.clear(); return 0; } if (!util.isEqual(saveCutoff, cutoff)) { saveCutoff = util.ceilDist(saveCutoff, precision); m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); } m->mothurOut("It took " + toString(time(nullptr) - start) + " seconds to cluster.\n"); return 0; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "runMothurCluster"); exit(1); } } //********************************************************************************************************************** //this merging is just at the reporting level, after this info is printed to the file it is gone and does not effect the datastructures //that are used to cluster by distance. this is done so that the overlapping data does not have more influenece than the distance data. ListVector* MGClusterCommand::mergeOPFs(map binInfo, float dist){ try { //create new listvector so you don't overwrite the clustering ListVector* newList = new ListVector(oldList); bool done = false; ifstream inOverlap; int count = 0; if (overlapMatrix.size() == 0) { done = true; } while (!done) { if (m->getControl_pressed()) { return newList; } //get next overlap seqDist overlapNode; if (count < overlapMatrix.size()) { //do we have another node in the matrix overlapNode = overlapMatrix[count]; count++; }else { break; } if (overlapNode.dist < dist) { //get names of seqs that overlap string name1 = nameMap->get(overlapNode.seq1); string name2 = nameMap->get(overlapNode.seq2); //use binInfo to find out if they are already in the same bin //map::iterator itBin1 = binInfo.find(name1); //map::iterator itBin2 = binInfo.find(name2); //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); } //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); } //int binKeep = itBin1->second; //int binRemove = itBin2->second; int binKeep = binInfo[name1]; int binRemove = binInfo[name2]; //if not merge bins and update binInfo if(binKeep != binRemove) { //save names in old bin string names = newList->get(binRemove); //merge bins into name1s bin newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep)); newList->set(binRemove, ""); vector binNames; util.splitAtComma(names, binNames); //update binInfo //save name and new bin number for (int i = 0; i < binNames.size(); i++) { binInfo[binNames[i]] = binKeep; } } }else { done = true; } } //return listvector return newList; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "mergeOPFs"); exit(1); } } //********************************************************************************************************************** void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){ try { //vector names = ct.getNamesOfSeqs(); //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); } //return rav; for(int i = 0; i < list->getNumBins(); i++) { vector binNames; string bin = list->get(i); util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]); } rabund->push_back(total); } } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "createRabund"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mgclustercommand.h000077500000000000000000000040441424121717000217440ustar00rootroot00000000000000#ifndef MGCLUSTERCOMMAND_H #define MGCLUSTERCOMMAND_H /* * mgclustercommand.h * Mothur * * Created by westcott on 12/11/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "readblast.h" #include "nameassignment.hpp" #include "cluster.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "counttable.h" #include "opticluster.h" #include "optiblastmatrix.hpp" /**********************************************************************/ class MGClusterCommand : public Command { public: MGClusterCommand(string); ~MGClusterCommand(){} vector setParameters(); string getCommandName() { return "mgcluster"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Handelsman J (2008). A statistical toolbox for metagenomics. BMC Bioinformatics 9: 34. \nhttp://www.mothur.org/wiki/Mgcluster"; } string getDescription() { return "cluster your sequences into OTUs using a blast file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: ReadBlast* read; NameAssignment* nameMap; Cluster* cluster; ListVector* list; ListVector oldList; CountTable* ct; RAbundVector rav; vector overlapMatrix; vector outputNames; string blastfile, method, namefile, countfile, overlapFile, distFile, sabundFileName, rabundFileName, listFileName, metric, initialize, tag, fileroot; ofstream sabundFile, rabundFile, listFile; double cutoff; float penalty, adjust, stableMetric; int precision, length, precisionLength, maxIters; bool abort, minWanted, hclusterWanted, merge, cutoffSet; void printData(ListVector*, map&, bool&); ListVector* mergeOPFs(map, float); vector getSeqs(ifstream&); void createRabund(CountTable*&, ListVector*&, RAbundVector*&); int runOptiCluster(); int runMothurCluster(); }; /**********************************************************************/ #endif mothur-1.48.0/source/commands/mimarksattributescommand.cpp000077500000000000000000000530621424121717000240500ustar00rootroot00000000000000// // mimarksattributescommand.cpp // Mothur // // Created by Sarah Westcott on 3/17/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "mimarksattributescommand.h" //********************************************************************************************************************** vector MimarksAttributesCommand::setParameters(){ try { CommandParameter pxml("xml", "InputTypes", "", "", "none", "none", "none","summary",false,false,true); parameters.push_back(pxml); CommandParameter psets("package", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string MimarksAttributesCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "source") { pattern = "[filename],source"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** string MimarksAttributesCommand::getHelpString(){ try { string helpString = ""; helpString += "Reads bioSample Attributes xml and generates source for get.mimarkspackage command. Only parameter required is xml.\n"; helpString += "The package parameter allows you to set the package you want. Default MIMARKS.survey.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** MimarksAttributesCommand::MimarksAttributesCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; xmlFile = validParameter.validFile(parameters, "xml"); if (xmlFile == "not open") { abort = true; } else if (xmlFile == "not found") { xmlFile = ""; abort=true; m->mothurOut("You must provide an xml file. It is required.\n"); } selectedPackage = validParameter.valid(parameters, "package"); if (selectedPackage == "not found") { selectedPackage = "MIMARKS.survey."; } if (outputdir == ""){ outputdir = util.hasPath(xmlFile); } } } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "MimarksAttributesCommand"); exit(1); } } //********************************************************************************************************************** int MimarksAttributesCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } ifstream in; util.openInputFile(xmlFile, in); string header = util.getline(in); gobble(in); if (header != "") { m->mothurOut("[ERROR]: " + header + " is not a bioSample attribute file.\n"); m->setControl_pressed(true); } map categories; map::iterator it; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } Attribute attribute = readAttribute(in); if (attribute.name != "") { if (m->getDebug()) { m->mothurOut("[DEBUG]: name=" + attribute.name + " harmonizedName=" + attribute.harmonizedName + " format=" + attribute.format + " description=" + attribute.description + " package=" + attribute.getPackagesString() + "\n"); } if (attribute.format == "") { attribute.format = "{none}"; } if (attribute.description == "") { attribute.description = "none"; } for (int i = 0; i < attribute.packages.size(); i++) { for (int j = 0; j < attribute.packages[i].groupName.length(); j++) { if (attribute.packages[i].groupName[j] == '-') { attribute.packages[i].groupName[j] = '_'; } } it = categories.find(attribute.packages[i].groupName); if (it != categories.end()) { //we already have this category, ie air, soil... if (attribute.packages[i].name == (it->second).packageName) { //add attribute to category (it->second).values[attribute.harmonizedName].required = attribute.packages[i].required; (it->second).values[attribute.harmonizedName].format = attribute.format; string newDescription = ""; for (int j = 0; j < attribute.description.length(); j++) { if (attribute.description[j] == '"') { newDescription += "\\\""; } else { newDescription += attribute.description[j]; } } (it->second).values[attribute.harmonizedName].description = newDescription; } }else { if ((attribute.packages[i].groupName == "\"Built\"") || (attribute.packages[i].groupName == "\"Nucleic Acid Sequence Source\"")) {} else { Group thisGroup(attribute.packages[i].name); thisGroup.values[attribute.harmonizedName].required = attribute.packages[i].required; thisGroup.values[attribute.harmonizedName].format = attribute.format; string newDescription = ""; for (int j = 0; j < attribute.description.length(); j++) { if (attribute.description[j] == '"') { newDescription += "\\\""; } else { newDescription += attribute.description[j]; } } thisGroup.values[attribute.harmonizedName].description = newDescription; categories[attribute.packages[i].groupName] = thisGroup; } } } } } in.close(); string requiredByALL = "*sample_name\t*description\t*sample_title\t*seq_methods\t*organism"; string rFormatALL = "#{text}\t{text}\t{text}\t{text}\t{controlled vacabulary}"; string rDescriptionALL = "#{sample name}\t{description of sample}\t{sample title}\t{description of library_construction_protocol}\t{http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock}"; string environment = "\"Environment\""; it = categories.find(environment); if (it != categories.end()) { map::iterator itValue = (it->second).values.begin(); if (itValue->second.required) { requiredByALL += "\t*" + itValue->first; rFormatALL += "\t{" + (itValue->second.format) + "}"; rDescriptionALL += "\t{" + (itValue->second.description) + "}"; } itValue++; for (; itValue != (it->second).values.end(); itValue++) { if (itValue->second.required) { requiredByALL += "\t*" + itValue->first; rFormatALL += "\t{" + (itValue->second.format) + "}"; rDescriptionALL += "\t{" + (itValue->second.description) + "}"; } } } ofstream out; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(xmlFile)); string outputFileName = getOutputFileName("source",variables); outputNames.push_back(outputFileName); outputTypes["source"].push_back(outputFileName); util.openOutputFile(outputFileName, out); //create outputs string requiredValues = requiredByALL; string nonRequiredValues = ""; string rFormat = rFormatALL; string nonRFormat = ""; string rDescription = rDescriptionALL; string nonRDescription = ""; it = categories.begin(); map::iterator itValue = (it->second).values.begin(); if (itValue->second.required) { requiredValues += "\t*" + itValue->first; rFormat += "\t{" + (itValue->second.format) + "}"; rDescription += "\t{" + (itValue->second.description) + "}"; }else { nonRequiredValues += itValue->first; nonRFormat += "{" + itValue->second.format + "}"; nonRDescription += "{" + (itValue->second.description) + "}"; } itValue++; for (; itValue != (it->second).values.end(); itValue++) { if (itValue->second.required) { requiredValues += "\t*" + itValue->first; rFormat += "\t{" + (itValue->second.format) + "}"; rDescription += "\t{" + (itValue->second.description) + "}"; }else { nonRequiredValues += "\t" + itValue->first; nonRFormat += "\t{" + itValue->second.format + "}"; nonRDescription += "\t{" + (itValue->second.description) + "}"; } } out << "if (package == " + it->first + ") {\n"; out << "\tout << \"#" + it->second.packageName + "\" << endl;\n"; out << "\t if (requiredonly) {\n"; out << "\t\tout << \"" + rDescription + "\" << endl;\n"; out << "\t\tout << \"" + rFormat + "\" << endl;\n"; out << "\t\tout << \"" + requiredValues + "\" << endl;\n"; out << "\t}else {\n"; out << "\t\tout << \"" + rDescription + '\t' + nonRDescription + "\" << endl;\n"; out << "\t\tout << \"" + rFormat + '\t' + nonRFormat + "\" << endl;\n"; out << "\t\tout << \"" + requiredValues + '\t' + nonRequiredValues + "\" << endl;\n"; out << "\t}\n"; out << "}"; it++; for (; it != categories.end(); it++) { if ((it->first == "\"Environment\"")) {} else { //create outputs string requiredValues = requiredByALL; string nonRequiredValues = ""; string rFormat = rFormatALL; string nonRFormat = ""; string rDescription = rDescriptionALL; string nonRDescription = ""; map::iterator itValue = (it->second).values.begin(); if (itValue->second.required) { requiredValues += "\t*" + itValue->first; rFormat += "\t{" + (itValue->second.format)+ "}"; rDescription += "\t{" + (itValue->second.description) + "}"; }else { nonRequiredValues += itValue->first; nonRFormat += "{" + itValue->second.format+ "}"; nonRDescription += "{" + (itValue->second.description) + "}"; } itValue++; for (; itValue != (it->second).values.end(); itValue++) { if (itValue->second.required) { requiredValues += "\t*" + itValue->first; rFormat += "\t{" + (itValue->second.format)+ "}"; rDescription += "\t{" + (itValue->second.description) + "}"; }else { nonRequiredValues += "\t" + itValue->first; nonRFormat += "\t{" + itValue->second.format+ "}"; nonRDescription += "\t{" + (itValue->second.description) + "}"; } } out << "else if (package == " + it->first + ") {\n"; out << "\tout << \"#" + it->second.packageName + "\" << endl;\n"; out << "\t if (requiredonly) {\n"; out << "\t\tout << \"" + rDescription + "\" << endl;\n"; out << "\t\tout << \"" + rFormat + "\" << endl;\n"; out << "\t\tout << \"" + requiredValues + "\" << endl;\n"; out << "\t}else {\n"; out << "\t\tout << \"" + rDescription + '\t' + nonRDescription + "\" << endl;\n"; out << "\t\tout << \"" + rFormat + '\t' + nonRFormat + "\" << endl;\n"; out << "\t\tout << \"" + requiredValues + '\t' + nonRequiredValues + "\" << endl;\n"; out << "\t}\n"; out << "}"; } } out << endl << endl; it = categories.begin(); out << "if ((package == " << it->first << ") "; it++; for (; it != categories.end(); it++) { out << "|| (package == " << it->first << ") "; } out << ") {}\n\n"; out << "vector requiredFieldsForPackage;\n"; vector rAll; util.splitAtChar(requiredByALL, rAll, '\t'); for (int i = 0; i < rAll.size(); i++) { out << "requiredFieldsForPackage.push_back(\"" + rAll[i].substr(1) + "\");\n"; } out << "\n\n"; for (it = categories.begin(); it != categories.end(); it++) { out << "if (packageType == \"" << it->second.packageName << "\") {"; for (map::iterator itValue = (it->second).values.begin(); itValue != (it->second).values.end(); itValue++) { if (itValue->second.required) { out << "\trequiredFieldsForPackage.push_back(\"" + itValue->first + "\");"; } } out << "}\n"; } out.close(); m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "execute"); exit(1); } } //********************************************************************************************************************** Attribute MimarksAttributesCommand::readAttribute(ifstream& in){ try { //read string header = util.getline(in); gobble(in); if (header == "") { Attribute temp; return temp; } if (header != "") { m->mothurOut("[ERROR]: " + header + ", expected '' in file.\n"); m->setControl_pressed(true); } //read name //wastewater type gobble(in); string name = util.getline(in); gobble(in); trimTags(name); //read hamonized name //wastewater_type gobble(in); string hname = util.getline(in); gobble(in); trimTags(hname); //read description // //the origin of wastewater such as human waste, rainfall, storm drains, etc. // string description = ""; unsigned long long spot = in.tellg(); gobble(in); char c = in.get(); c = in.get(); if (c == 'D') { //description description += "") != string::npos) { break; } } trimTags(description); }else { //package in.seekg(spot); } //read format //{text} spot = in.tellg(); gobble(in); c = in.get(); c = in.get(); string format = ""; if (c == 'F') { //format format += "") == string::npos) { //format is not on oneline while (!in.eof()) { gobble(in); string thisLine = util.getline(in); gobble(in); format += thisLine; if (thisLine.find("") != string::npos) { break; } } } trimTags(format); }else { //package in.seekg(spot); } Attribute attribute(hname, description, name, format); //read Synonym - may be none //ref biomaterial bool FirstTime = true; while (!in.eof()) { unsigned long long thisspot = in.tellg(); gobble(in); char c = in.get(); c = in.get(); if (c == 'S') { //synonym FirstTime = false; util.getline(in); gobble(in); }else { //package if (FirstTime) { in.seekg(spot); } else { in.seekg(thisspot); } break; } } //read packages - may be none //MIGS.ba.air.4.0 while (!in.eof()) { string package = util.getline(in); gobble(in); if (package == "") { break; } else { Package thisPackage = parsePackage(package); if (thisPackage.groupName != "ignore") { attribute.packages.push_back(thisPackage); } } } return attribute; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "execute"); exit(1); } } //********************************************************************************************************************** Package MimarksAttributesCommand::parsePackage(string package){ try { string openingTag = trimTags(package); Package thispackage; thispackage.name = package; //only care about packages from our selection if (thispackage.name.find(selectedPackage) == string::npos) { thispackage.groupName = "ignore"; return thispackage; } int pos = openingTag.find("use"); if (pos != string::npos) { //read required or not string use = openingTag.substr(openingTag.find_first_of("\""), 11); if (use == "\"mandatory\"") { thispackage.required = true; } }else { m->mothurOut("[ERROR]: parsing error - " + openingTag + ". Expeacted something like in file.\n"); m->setControl_pressed(true); return thispackage; } //selectedPackage = MIMARKS.survey. pos = package.find(selectedPackage); if (pos != string::npos) { //read groupname string group = package.substr(pos+15); group = group.substr(0, (group.find_first_of("."))); thispackage.groupName = "\"" + group + "\""; }else { thispackage.groupName = "ignore"; } return thispackage; } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "parsePackage"); exit(1); } } //********************************************************************************************************************** string MimarksAttributesCommand::trimTags(string& value){ try { string forwardTag = ""; string thisValue = ""; int openCarrot = 0; int closedCarrot = 0; for (int i = 0; i < value.length(); i++) { if (m->getControl_pressed()) { return forwardTag; } if (value[i] == '<') { openCarrot++; } else if (value[i] == '>') { closedCarrot++; } //you are reading front tag if ((openCarrot == 1) && (closedCarrot == 0)) { forwardTag += value[i]; } if (openCarrot == closedCarrot) { //reading value if (value[i] != '>') { thisValue += value[i]; } } if (openCarrot > 1) { break; } } value = thisValue; return (forwardTag + '>'); } catch(exception& e) { m->errorOut(e, "MimarksAttributesCommand", "trimTags"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/mimarksattributescommand.h000077500000000000000000000054571424121717000235220ustar00rootroot00000000000000// // mimarksattributescommand.h // Mothur // // Created by Sarah Westcott on 3/17/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__mimarksattributescommand__ #define __Mothur__mimarksattributescommand__ #include "command.hpp" struct Package { bool required; string groupName; string name; Package() { required=false; groupName=""; name=""; } Package(bool r, string g, string n) : required(r), groupName(g), name(n) {} ~Package() = default; string getPackageString() { string r = "mandatory"; if (!required) { r = "optional"; } string packageString = name + '\t' + groupName + '\t' + r; return packageString; } }; struct Value { bool required; string format, description; Value() { format=""; description=""; required=false; } Value(bool r, string d, string f) : format(f), description(d), required(r) {} ~Value() = default; }; struct Group { string packageName; map values; Group() { packageName = ""; } Group(string p) : packageName(p) {} ~Group() = default; }; struct Attribute { string name, harmonizedName, description, format; vector packages; string getPackagesString() { string packagesString = ""; for (int i = 0; i < packages.size(); i++) { packagesString += packages[i].getPackageString() + "\n"; } return packagesString; } Attribute() { format=""; description=""; harmonizedName=""; name=""; } Attribute(string hn, string d, string n, string f) : format(f), harmonizedName(hn), name(n), description(d) {} ~Attribute() = default; }; /**************************************************************************************************/ class MimarksAttributesCommand : public Command { public: MimarksAttributesCommand(string); ~MimarksAttributesCommand(){} vector setParameters(); string getCommandName() { return "mimarks.attributes"; } string getCommandCategory() { return "Hidden"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Mimarks.attributes"; } string getDescription() { return "Reads bioSample Attributes xml and generates source for get.mimarkspackage command."; } int execute(); void help() { m->mothurOut(getHelpString()); } private: Attribute readAttribute(ifstream& in); Package parsePackage(string package); string trimTags(string& value); bool abort; string xmlFile, selectedPackage; vector outputNames; }; /**************************************************************************************************/ #endif /* defined(__Mothur__mimarksattributescommand__) */ mothur-1.48.0/source/commands/newcommandtemplate.cpp000077500000000000000000000367131424121717000226270ustar00rootroot00000000000000// // newcommandtemplate.cpp // Mothur // // Created by Sarah Westcott on 5/3/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // // #include "newcommandtemplate.h" // Test Change. //********************************************************************************************************************** vector NewCommand::setParameters(){ try { //eaxamples of each type of parameter. more info on the types of parameters can be found in commandparameter.h CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors); //files that have dependancies CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","outputType",false,false); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName","outputType",false,false); parameters.push_back(pname); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","outputType",false,false); parameters.push_back(pcolumn); //files that do not have dependancies - fasta is set to not be required whereas shared is set to be required CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","outputType",false,false); parameters.push_back(pfasta); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); //choose more than one multiple options CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jest-thetayc", "", "", "","",true,false); parameters.push_back(pcalc); //choose only one multiple options CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "","",false,false); parameters.push_back(pdistance); CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); //set output file types vector tempOutNames; outputTypes["fileType1"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos... outputTypes["fileType2"] = tempOutNames; outputTypes["FileType3"] = tempOutNames; //set abort and called Help abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "NewCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string NewCommand::getHelpString(){ try { string helpString = ""; helpString += "The new command allows you to ....\n"; helpString += "The new command parameters are: ....\n"; helpString += "The whatever parameter is used to ....\n"; helpString += "The new command should be in the following format: \n"; helpString += "new(...)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "NewCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string NewCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fileType1") { pattern = "[filename],tag1"; } else if (type == "fileType2") { pattern = "[filename],tag2"; } else if (type == "fileType3") { pattern = "[filename],tag3"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "NewCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** NewCommand::NewCommand(string option) : Command() { try { //////////////////////////////////////////////////////// /////////////////// start leave alone block //////////// //////////////////////////////////////////////////////// //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { //valid paramters for this command OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; ///variables for examples below that you will most likely want to put in the header for //use by the other class functions. string phylipfile, columnfile, namefile, fastafile, sharedfile, method, countfile; int processors; bool useTiming, allLines; vector Estimators, Groups; set labels; //if allLines is used it should be initialized to 1 above. //check for parameters phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { current->setColumnFile(columnfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } //get fastafile - it is not required fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort=true; } else if (fastafile == "not found") { fastafile = ""; } if (fastafile != "") { current->setFastaFile(fastafile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the cluster command.\n"); abort = true; } } } else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column.\n"); abort = true; } if (columnfile != "") { if (namefile == "") { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { m->mothurOut("You need to provide a namefile if you are going to use the column format.\n"); abort = true; } } } //get shared file, it is required sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } ////////////////////////////////////////////////////////////////////// ////////// example of getting other types of parameters ////////////// ////////////////////////////////////////////////////////////////////// //use only one Mutliple type method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "average"; } if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { } else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, and weighted.\n"); abort = true; } //use more than one multiple type. do not check to make sure the entry is valid. string calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } else { if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } } util.splitAtDash(calc, Estimators); //Boolean type - m->isTrue looks for t, true, f or false and is case insensitive string timing = validParameter.valid(parameters, "timing"); if (timing == "not found") { timing = "F"; } useTiming = util.isTrue(timing); //Number type - mothurConvert makes sure the convert can happen to avoid a crash. string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); //Groups must be checked later to make sure they are valid. SharedUtilities has functions of check the validity, just make to so m->setGroups() after the checks. If you are using these with a shared file no need to check the SharedRAbundVector class will call SharedUtilites for you, kinda nice, huh? string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } //Commonly used to process list, rabund, sabund, shared and relabund files. Look at "smart distancing" examples below in the execute function. string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } //if your command has a namefile as an option, you may want ot check to see if there is a current namefile //saved by mothur that is associated with the other files you are using as inputs. //You can do so by adding the files associated with the namefile to the files vector and then asking parser to check. } } catch(exception& e) { m->errorOut(e, "NewCommand", "NewCommand"); exit(1); } } //********************************************************************************************************************** int NewCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } /* InputData input(inputFileName, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; if (format == "relabund") { SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } //////// myfunction(lookup); - call your function to process relabund data //////////////////// delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "sharedfile") { SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } //////// myfunction(lookup); - call your function to process shared data //////////////////// delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "list") { ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } //////// myfunction(list); - call your function to process list data //////////////////// delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } }else if (format == "rabund") { RAbundVector* rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); while (rabund != nullptr) { if (m->getControl_pressed()) { delete rabund; break; } //////// myfunction(rabund); - call your function to process list data //////////////////// delete rabund; rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); } } */ //if you make a new file or a type that mothur keeps track of the current version, you can update it with something like the following. string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "NewCommand", "NewCommand"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/newcommandtemplate.h000077500000000000000000000034331424121717000222650ustar00rootroot00000000000000#ifndef Mothur_newcommandtemplate_h #define Mothur_newcommandtemplate_h // // newcommandtemplate.h // Mothur // // Created by westcott on 5/3/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // //test //*********Be sure to change ifdef and define to a unique name.**************// /* This class is designed to provide a template for creating new commands. It includes code snippets to make creating the command classes virtually pure functions easier. It includes sample parameter declaration and parameter checking, as well as reference to other classes you may find helpful. It also includes the code needed to read a sharedfile. It is a work in progress so please add things you may find helpful to yourself or other developers trying to add commands to mothur. */ #include "command.hpp" /**************************************************************************************************/ class NewCommand : public Command { public: NewCommand(string); ~NewCommand(){} vector setParameters(); string getCommandName() { return "newCommandNameToBeSeenByUser"; } string getCommandCategory() { return "commandCategory"; } string getOutputPattern(string); //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/newCommandNameToBeSeenByUser"; } string getDescription() { return "brief description"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; vector outputNames; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/nmdscommand.cpp000077500000000000000000000472731424121717000212460ustar00rootroot00000000000000/* * nmdscommand.cpp * mothur * * Created by westcott on 1/11/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "nmdscommand.h" #include "readphylipvector.h" //********************************************************************************************************************** vector NMDSCommand::setParameters(){ try { CommandParameter paxes("axes", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(paxes); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","nmds-stress",false,true,true); parameters.push_back(pphylip); CommandParameter pmaxdim("maxdim", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmaxdim); CommandParameter pmindim("mindim", "Number", "", "2", "", "", "","",false,false); parameters.push_back(pmindim); CommandParameter piters("iters", "Number", "", "10", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pmaxiters("maxiters", "Number", "", "500", "", "", "","",false,false); parameters.push_back(pmaxiters); CommandParameter pepsilon("epsilon", "Number", "", "0.000000000001", "", "", "","",false,false); parameters.push_back(pepsilon); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["nmds"] = tempOutNames; outputTypes["iters"] = tempOutNames; outputTypes["stress"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string NMDSCommand::getHelpString(){ try { string helpString = ""; helpString += "The nmds command is modelled after the nmds code written in R by Sarah Goslee, using Non-metric multidimensional scaling function using the majorization algorithm from Borg & Groenen 1997, Modern Multidimensional Scaling.\n"; helpString += "The nmds command parameters are phylip, axes, mindim, maxdim, maxiters, iters and epsilon.\n"; helpString += "The phylip parameter allows you to enter your distance file.\n"; helpString += "The axes parameter allows you to enter a file containing a starting configuration.\n"; helpString += "The maxdim parameter allows you to select the maximum dimensions to use. Default=2\n"; helpString += "The mindim parameter allows you to select the minimum dimensions to use. Default=2\n"; helpString += "The maxiters parameter allows you to select the maximum number of iters to try with each random configuration. Default=500\n"; helpString += "The iters parameter allows you to select the number of random configuration to try. Default=10\n"; helpString += "The epsilon parameter allows you to select set an acceptable stopping point. Default=1e-12.\n"; helpString += "Example nmds(phylip=yourDistanceFile).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string NMDSCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "nmds") { pattern = "[filename],nmds.axes"; } else if (type == "stress") { pattern = "[filename],nmds.stress"; } else if (type == "iters") { pattern = "[filename],nmds.iters"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** NMDSCommand::NMDSCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser. getParameters(); ValidParameters validParameter; phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { //if there is a current phylip file, use it phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You have no current phylip file and the phylip parameter is required.\n"); abort = true; } }else { current->setPhylipFile(phylipfile); } axesfile = validParameter.validFile(parameters, "axes"); if (axesfile == "not open") { axesfile = ""; abort = true; } else if (axesfile == "not found") { axesfile = ""; } if (outputdir == ""){ outputdir += util.hasPath(phylipfile); } string temp = validParameter.valid(parameters, "mindim"); if (temp == "not found") { temp = "2"; } util.mothurConvert(temp, mindim); temp = validParameter.valid(parameters, "maxiters"); if (temp == "not found") { temp = "500"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "maxdim"); if (temp == "not found") { temp = "2"; } util.mothurConvert(temp, maxdim); temp = validParameter.valid(parameters, "epsilon"); if (temp == "not found") { temp = "0.000000000001"; } util.mothurConvert(temp, epsilon); if (mindim < 1) { m->mothurOut("mindim must be at least 1.\n"); abort = true; } if (maxdim < mindim) { maxdim = mindim; } } } catch(exception& e) { m->errorOut(e, "NMDSCommand", "NMDSCommand"); exit(1); } } //********************************************************************************************************************** int NMDSCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); vector names; vector< vector< double> > matrix; //read in phylip file ReadPhylipVector readFile(phylipfile); names = readFile.read(matrix); if (m->getControl_pressed()) { return 0; } //read axes vector< vector > axes; if (axesfile != "") { axes = readAxes(names); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(phylipfile)); string outputFileName = getOutputFileName("iters",variables); string stressFileName = getOutputFileName("stress",variables); outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName); outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName); ofstream out, out2; util.openOutputFile(outputFileName, out); util.openOutputFile(stressFileName, out2); out2.setf(ios::fixed, ios::floatfield); out2.setf(ios::showpoint); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out2 << "Dimension\tIter\tStress\tRsq" << endl; double bestStress = 10000000; double bestR2 = 10000000; vector< vector > bestConfig; int bestDim = 0; for (int i = mindim; i <= maxdim; i++) { m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine(); for (int j = 0; j < iters; j++) { m->mothurOut(toString(j+1)); m->mothurOutEndLine(); //get configuration - either randomly generate or resize to this dimension vector< vector > thisConfig; if (axesfile == "") { thisConfig = generateStartingConfiguration(names.size(), i); } else { thisConfig = getConfiguration(axes, i); } if (m->getControl_pressed()) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { util.mothurRemove(outputNames[k]); } return 0; } //calc nmds for this dimension double stress; vector< vector > endConfig = nmdsCalc(matrix, thisConfig, stress); if (m->getControl_pressed()) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { util.mothurRemove(outputNames[k]); } return 0; } //calc euclid distances for new config vector< vector > newEuclid = linearCalc.calculateEuclidianDistance(endConfig); if (m->getControl_pressed()) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { util.mothurRemove(outputNames[k]); } return 0; } //calc correlation between original distances and euclidean distances from this config double rsquared = linearCalc.calcPearson(newEuclid, matrix); rsquared *= rsquared; if (m->getControl_pressed()) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { util.mothurRemove(outputNames[k]); } return 0; } //output results out << "Config" << (j+1); for (int k = 0; k < i; k++) { out << '\t' << "axis" << (k+1); } out << endl; out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << rsquared << endl; output(endConfig, names, out); //save best if (stress < bestStress) { bestDim = i; bestStress = stress; bestR2 = rsquared; bestConfig = endConfig; } if (m->getControl_pressed()) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) { util.mothurRemove(outputNames[k]); } return 0; } } } out.close(); out2.close(); //output best config string BestFileName = getOutputFileName("nmds",variables); outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName); m->mothurOut("\nNumber of dimensions:\t" + toString(bestDim) + "\n"); m->mothurOut("Lowest stress :\t" + toString(bestStress) + "\n"); m->mothurOut("R-squared for configuration:\t" + toString(bestR2) + "\n"); ofstream outBest; util.openOutputFile(BestFileName, outBest); outBest.setf(ios::fixed, ios::floatfield); outBest.setf(ios::showpoint); outBest << "group"; for (int k = 0; k < bestConfig.size(); k++) { outBest << '\t' << "axis" << (k+1); } outBest << endl; output(bestConfig, names, outBest); outBest.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "execute"); exit(1); } } //********************************************************************************************************************** vector< vector > NMDSCommand::nmdsCalc(vector< vector >& matrix, vector< vector >& config, double& stress1) { try { vector< vector > newConfig = config; //calc euclid distances vector< vector > euclid = linearCalc.calculateEuclidianDistance(newConfig); if (m->getControl_pressed()) { return newConfig; } double stress2 = calculateStress(matrix, euclid); stress1 = stress2 + 1.0 + epsilon; int count = 0; while ((count < maxIters) && (abs(stress1 - stress2) > epsilon)) { count++; stress1 = stress2; if (m->getControl_pressed()) { return newConfig; } vector< vector > b; b.resize(euclid.size()); for (int i = 0; i < b.size(); i++) { b[i].resize(euclid[i].size(), 0); } vector columnSums; columnSums.resize(euclid.size(), 0); for (int i = 0; i < euclid.size(); i++) { for (int j = 0; j < euclid[i].size(); j++) { //eliminate divide by zero error if (!util.isEqual(euclid[i][j], 0)) { b[i][j] = matrix[i][j] / euclid[i][j]; columnSums[j] += b[i][j]; b[i][j] *= -1.0; } } } //put in diagonal sums for (int i = 0; i < euclid.size(); i++) { b[i][i] = columnSums[i]; } int numInLowerTriangle = matrix.size() * (matrix.size()-1) / 2.0; double n = (1.0 + sqrt(1.0 + 8.0 * numInLowerTriangle)) / 2.0; //matrix mult newConfig = linearCalc.matrix_mult(newConfig, b); for (int i = 0; i < newConfig.size(); i++) { for (int j = 0; j < newConfig[i].size(); j++) { newConfig[i][j] *= (1.0 / n); } } euclid = linearCalc.calculateEuclidianDistance(newConfig); stress2 = calculateStress(matrix, euclid); } return newConfig; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "generateStartingConfiguration"); exit(1); } } //********************************************************************************************************************** //generate random config vector< vector > NMDSCommand::generateStartingConfiguration(int numNames, int dimension) { try { vector< vector > axes; axes.resize(dimension); for (int i = 0; i < axes.size(); i++) { axes[i].resize(numNames); } //generate random number between -1 and 1, precision 6 for (int i = 0; i < axes.size(); i++) { for (int j = 0; j < axes[i].size(); j++) { if (m->getControl_pressed()) { return axes; } //generate random int between 0 and 99999 int myrand = util.getRandomIndex(99999); //generate random sign int mysign = util.getRandomIndex(99999); //if mysign is even then sign = positive, else sign = negative if ((mysign % 2) == 0) { mysign = 1.0; } else { mysign = -1.0; } axes[i][j] = mysign * myrand / (float) 100000; } } return axes; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "generateStartingConfiguration"); exit(1); } } //********************************************************************************************************************** //normalize configuration int NMDSCommand::normalizeConfiguration(vector< vector >& axes, int numNames, int dimension) { try { vector averageAxes; averageAxes.resize(dimension, 0.0); //find average for (int i = 0; i < axes.size(); i++) { for (int j = 0; j < axes[i].size(); j++) { averageAxes[i] += axes[i][j]; } averageAxes[i] /= (float) numNames; } //normalize axes double sumDenom = 0.0; for (int i = 0; i < axes.size(); i++) { for (int j = 0; j < axes[i].size(); j++) { sumDenom += ((axes[i][j] - averageAxes[i]) * (axes[i][j] - averageAxes[i])); } } double denom = sqrt((sumDenom / (float) (axes.size() * numNames))); for (int i = 0; i < axes.size(); i++) { for (int j = 0; j < axes[i].size(); j++) { axes[i][j] = (axes[i][j] - averageAxes[i]) / denom; } } return 0; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "normalizeConfiguration"); exit(1); } } //********************************************************************************************************************** //get configuration vector< vector > NMDSCommand::getConfiguration(vector< vector >& axes, int dimension) { try { vector< vector > newAxes; newAxes.resize(dimension); for (int i = 0; i < dimension; i++) { newAxes[i] = axes[i]; } return newAxes; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "getConfiguration"); exit(1); } } //********************************************************************************************************************** //find raw stress, and normalize using double NMDSCommand::calculateStress(vector< vector >& matrix, vector< vector >& config) { try { double normStress = 0.0; double denom = 0.0; double rawStress = 0.0; //find raw stress for (int i = 0; i < matrix.size(); i++) { for (int j = 0; j < matrix[i].size(); j++) { if (m->getControl_pressed()) { return normStress; } rawStress += ((matrix[i][j] - config[i][j]) * (matrix[i][j] - config[i][j])); denom += (config[i][j] * config[i][j]); } } //normalize stress if (!util.isEqual(rawStress, 0) && !util.isEqual(denom, 0)) { normStress = sqrt((rawStress / denom)); } return normStress; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "calculateStress"); exit(1); } } //********************************************************************************************************************** int NMDSCommand::output(vector< vector >& config, vector& names, ofstream& out) { try { for (int i = 0; i < names.size(); i++) { out << names[i]; for (int j = 0; j < config.size(); j++) { out << '\t' << config[j][i]; } out << endl; } out << endl << endl; return 0; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "output"); exit(1); } } /*****************************************************************/ vector< vector > NMDSCommand::readAxes(vector names){ try { ifstream in; util.openInputFile(axesfile, in); string headerLine = util.getline(in); gobble(in); //count the number of axis you are reading bool done = false; int count = 0; while (!done) { int pos = headerLine.find("axis"); if (pos != string::npos) { count++; headerLine = headerLine.substr(pos+4); }else { done = true; } } if (maxdim > count) { m->mothurOut("You requested maxdim = " + toString(maxdim) + ", but your file only includes " + toString(count) + ". Using " + toString(count) + ".\n"); maxdim = count; if (maxdim < mindim) { m->mothurOut("Also adjusting mindim to " + toString(maxdim-1) + ".\n"); } } vector< vector > axes; axes.resize(maxdim); for (int i = 0; i < axes.size(); i++) { axes[i].resize(names.size(), 0.0); } map > orderedAxes; map >::iterator it; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return axes; } string group = ""; in >> group; gobble(in); bool ignore = false; if (!util.inUsersGroups(group, names)) { ignore = true; m->mothurOut(group + " is in your axes file and not in your distance file, ignoring.\n"); } vector thisGroupsAxes; for (int i = 0; i < count; i++) { float temp = 0.0; in >> temp; //only save the axis we want if (i < maxdim) { thisGroupsAxes.push_back(temp); } } if (!ignore) { orderedAxes[group] = thisGroupsAxes; } gobble(in); } in.close(); //sanity check if (names.size() != orderedAxes.size()) { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting.\n"); m->setControl_pressed(true); return axes; } //put axes info in same order as distance file, just in case for (int i = 0; i < names.size(); i++) { it = orderedAxes.find(names[i]); if (it != orderedAxes.end()) { vector thisGroupsAxes = it->second; for (int j = 0; j < thisGroupsAxes.size(); j++) { axes[j][i] = thisGroupsAxes[j]; } }else { m->mothurOut("[ERROR]: your axes file does not match your distance file, aborting.\n"); m->setControl_pressed(true); return axes; } } return axes; } catch(exception& e) { m->errorOut(e, "NMDSCommand", "readAxes"); exit(1); } } /**********************************************************************************************************************/ mothur-1.48.0/source/commands/nmdscommand.h000077500000000000000000000037431424121717000207050ustar00rootroot00000000000000#ifndef NMDSCOMMAND_H #define NMDSCOMMAND_H /* * nmdscommand.h * mothur * * Created by westcott on 1/11/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "linearalgebra.h" /* Translated from the nmds.R code written by Sarah Goslee using, # Non-metric multidimensional scaling function # using the majorization algorithm from # Borg & Groenen 1997, Modern Multidimensional Scaling. # # also referenced (Kruskal 1964) */ /*****************************************************************/ class NMDSCommand : public Command { public: NMDSCommand(string); ~NMDSCommand(){} vector setParameters(); string getCommandName() { return "nmds"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Borg, Groenen (1997). Non-metric multidimensional scaling function using the majorization algorithm, in Modern Multidimensional Scaling. Ed. T.F. Cox and M.A.A. Cox. Chapman and Hall. \nhttp://www.mothur.org/wiki/Nmds"; } string getDescription() { return "nmds"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string phylipfile, axesfile; int maxdim, mindim, maxIters, iters; double epsilon; vector outputNames; LinearAlgebra linearCalc; vector< vector > nmdsCalc(vector< vector >&, vector< vector >&, double&); vector< vector > getConfiguration(vector< vector >&, int); vector< vector > generateStartingConfiguration(int, int); //pass in numNames, return axes int normalizeConfiguration(vector< vector >&, int, int); double calculateStress(vector< vector >&, vector< vector >&); vector< vector > readAxes(vector); int output(vector< vector >&, vector&, ofstream&); }; /*****************************************************************/ #endif mothur-1.48.0/source/commands/nocommands.cpp000077500000000000000000000015301424121717000210660ustar00rootroot00000000000000/* * nocommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "nocommands.h" //********************************************************************************************************************** NoCommand::NoCommand(string option) : Command() {} //********************************************************************************************************************** int NoCommand::execute(){ MothurOut* m = MothurOut::getInstance(); //Could choose to give more help here? m->mothurOut("[ERROR]: Invalid command.\n"); CommandFactory* valid = CommandFactory::getInstance(); valid->printCommands(cout); return 2; } //********************************************************************************************************************** mothur-1.48.0/source/commands/nocommands.h000077500000000000000000000017751424121717000205460ustar00rootroot00000000000000#ifndef NOCOMMAND_H #define NOCOMMAND_H /* * nocommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This command is run if the user enters an invalid command. */ #include "command.hpp" #include "commandfactory.hpp" class NoCommand : public Command { public: NoCommand(string); ~NoCommand(){} vector setParameters() { return outputNames; } //dummy, doesn't really do anything string getCommandName() { return "NoCommand"; } string getCommandCategory() { return "Hidden"; } string getHelpString() { return "No Command"; } string getOutputPattern(string) { return ""; } string getCitation() { return "no citation"; } string getDescription() { return "no description"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; }; #endif mothur-1.48.0/source/commands/normalizesharedcommand.cpp000077500000000000000000000421251424121717000234630ustar00rootroot00000000000000/* * normalizesharedcommand.cpp * Mothur * * Created by westcott on 9/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "normalizesharedcommand.h" //********************************************************************************************************************** vector NormalizeSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter pnorm("norm", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnorm); CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakerelabund); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["shared"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string NormalizeSharedCommand::getHelpString(){ try { string helpString = ""; helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label. shared or relabund is required, unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n"; helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n"; helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n"; helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n"; helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += "The normalize.shared command outputs a .norm.shared file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string NormalizeSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "shared") { pattern = "[filename],[distance],norm.shared"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** NormalizeSharedCommand::NormalizeSharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { relabundfile = ""; abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { format = "relabund"; inputfile = relabundfile; current->setRelAbundFile(relabundfile); } if ((sharedfile == "") && (relabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file.\n"); abort = true; } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } method = validParameter.valid(parameters, "method"); if (method == "not found") { method = "totalgroup"; } if ((method != "totalgroup") && (method != "zscore")) { m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n"); abort = true; } string temp = validParameter.valid(parameters, "norm"); if (temp == "not found") { norm = 0; //once you have read, set norm to smallest group number }else { util.mothurConvert(temp, norm); if (norm < 0) { m->mothurOut("norm must be positive.\n"); abort=true; } } temp = validParameter.valid(parameters, "makerelabund"); if (temp == "") { temp = "f"; } makeRelabund = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand"); exit(1); } } //********************************************************************************************************************** int NormalizeSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } InputData input(inputfile, format, Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; //you are reading a sharedfile and you do not want to make relabund if ((format == "sharedfile") && (!makeRelabund)) { SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { lookup->removeGroups(norm); Groups = lookup->getNamesGroups(); } if (method == "totalgroup") { //set norm to smallest group number if (norm == 0) { norm = lookup->getNumSeqsSmallestGroup(); } m->mothurOut("Normalizing to " + toString(norm) + ".\n"); } bool printHeaders = true; while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } normalize(lookup, printHeaders); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } }else{ //relabund values SharedRAbundFloatVectors* lookupFloat = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookupFloat->getNamesGroups(); //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { lookupFloat->removeGroups(norm); Groups = lookupFloat->getNamesGroups(); } //set norm to smallest group number if (method == "totalgroup") { if (norm == 0) { norm = lookupFloat->getNumSeqsSmallestGroup(); Groups = lookupFloat->getNamesGroups(); } m->mothurOut("Normalizing to " + toString(norm) + ".\n"); } bool printHeaders = true; while (lookupFloat != nullptr) { if (m->getControl_pressed()) { delete lookupFloat; break; } normalize(lookupFloat, printHeaders); delete lookupFloat; lookupFloat = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOut("\nOutput File Names: \n"); //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); //set shared file as new current sharedfile string currentName = ""; itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "execute"); exit(1); } } //********************************************************************************************************************** int NormalizeSharedCommand::normalize(SharedRAbundVectors*& thisLookUp, bool& printHeaders){ try { vector lookupGroups = thisLookUp->getNamesGroups(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = thisLookUp->getLabel(); string outputFileName = getOutputFileName("shared",variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { //save numSeqs since they will change as the data is normalized vector sizes; for (int i = 0; i < lookupGroups.size(); i++) { sizes.push_back(thisLookUp->getNumSeqs(lookupGroups[i])); } for (int j = 0; j < thisLookUp->getNumBins(); j++) { for (int i = 0; i < lookupGroups.size(); i++) { if (m->getControl_pressed()) { out.close(); return 0; } int abund = thisLookUp->get(j, lookupGroups[i]); float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; //round to nearest int int finalNorm = (int) floor((newNorm + 0.5)); thisLookUp->set(j, finalNorm, lookupGroups[i]); } } }else if (method == "zscore") { for (int j = 0; j < thisLookUp->getNumBins(); j++) { if (m->getControl_pressed()) { out.close(); return 0; } //calc mean float mean = 0.0; for (int i = 0; i < lookupGroups.size(); i++) { mean += thisLookUp->get(j, lookupGroups[i]); } mean /= (float) lookupGroups.size(); //calc standard deviation float sumSquared = 0.0; for (int i = 0; i < lookupGroups.size(); i++) { sumSquared += (((float)thisLookUp->get(j, lookupGroups[i]) - mean) * ((float)thisLookUp->get(j, lookupGroups[i]) - mean)); } sumSquared /= (float) lookupGroups.size(); float standardDev = sqrt(sumSquared); for (int i = 0; i < lookupGroups.size(); i++) { int finalNorm = 0; if (!util.isEqual(standardDev, 0)) { // stop divide by zero float newNorm = ((float)thisLookUp->get(j, lookupGroups[i]) - mean) / standardDev; //round to nearest int finalNorm = (int) floor((newNorm + 0.5)); } thisLookUp->set(j, finalNorm, lookupGroups[i]); } } }else{ m->mothurOut(method + " is not a valid scaling option.\n"); m->setControl_pressed(true); return 0; } thisLookUp->eliminateZeroOTUS(); thisLookUp->print(out, printHeaders); out.close(); return 0; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "normalize"); exit(1); } } //********************************************************************************************************************** int NormalizeSharedCommand::normalize(SharedRAbundFloatVectors*& thisLookUp, bool& printHeaders){ try { vector lookupGroups = thisLookUp->getNamesGroups(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[distance]"] = thisLookUp->getLabel(); string outputFileName = getOutputFileName("shared",variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName); if (method == "totalgroup") { //save numSeqs since they will change as the data is normalized vector sizes; for (int i = 0; i < lookupGroups.size(); i++) { sizes.push_back(thisLookUp->getNumSeqs(lookupGroups[i])); } for (int j = 0; j < thisLookUp->getNumBins(); j++) { for (int i = 0; i < lookupGroups.size(); i++) { if (m->getControl_pressed()) { out.close(); return 0; } float abund = thisLookUp->get(j, lookupGroups[i]); float relabund = abund / (float) sizes[i]; float newNorm = relabund * norm; thisLookUp->set(j, newNorm, lookupGroups[i]); } } }else if (method == "zscore") { for (int j = 0; j < thisLookUp->getNumBins(); j++) { if (m->getControl_pressed()) { out.close(); return 0; } //calc mean float mean = 0.0; for (int i = 0; i < lookupGroups.size(); i++) { mean += thisLookUp->get(j, lookupGroups[i]); } mean /= (float) lookupGroups.size(); //calc standard deviation float sumSquared = 0.0; for (int i = 0; i < lookupGroups.size(); i++) { sumSquared += ((thisLookUp->get(j, lookupGroups[i]) - mean) * (thisLookUp->get(j, lookupGroups[i]) - mean)); } sumSquared /= (float) lookupGroups.size(); float standardDev = sqrt(sumSquared); for (int i = 0; i < lookupGroups.size(); i++) { float newNorm = 0.0; if (!util.isEqual(standardDev, 0)) { // stop divide by zero newNorm = ((float)thisLookUp->get(j, lookupGroups[i]) - mean) / standardDev; } thisLookUp->set(j, newNorm, lookupGroups[i]); } } }else{ m->mothurOut(method + " is not a valid scaling option.\n"); m->setControl_pressed(true); return 0; } thisLookUp->eliminateZeroOTUS(); thisLookUp->print(out, printHeaders); out.close(); return 0; } catch(exception& e) { m->errorOut(e, "NormalizeSharedCommand", "normalize"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/normalizesharedcommand.h000077500000000000000000000022541424121717000231270ustar00rootroot00000000000000#ifndef NORMALIZESHAREDCOMMAND_H #define NORMALIZESHAREDCOMMAND_H /* * normalizesharedcommand.h * Mothur * * Created by westcott on 9/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" class NormalizeSharedCommand : public Command { public: NormalizeSharedCommand(string); ~NormalizeSharedCommand() = default; vector setParameters(); string getCommandName() { return "normalize.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Normalize.shared"; } string getDescription() { return "normalize samples in a shared or relabund file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, pickedGroups, makeRelabund; set labels; //holds labels to be used string groups, label, method, sharedfile, relabundfile, format, inputfile; int norm; vector Groups, outputNames; int normalize(SharedRAbundVectors*&, bool&); int normalize(SharedRAbundFloatVectors*&, bool&); }; #endif mothur-1.48.0/source/commands/otuassociationcommand.cpp000077500000000000000000000473541424121717000233510ustar00rootroot00000000000000/* * otuassociationcommand.cpp * Mothur * * Created by westcott on 1/19/12. * Copyright 2012 Schloss Lab. All rights reserved. * */ #include "otuassociationcommand.h" #include "linearalgebra.h" //********************************************************************************************************************** vector OTUAssociationCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","otucorr",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRelMeta", "SharedRelMeta", "none","otucorr",false,false); parameters.push_back(prelabund); CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata); CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pmethod("method", "Multiple", "pearson-spearman-kendall", "pearson", "", "", "","",false,false,true); parameters.push_back(pmethod); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["otucorr"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string OTUAssociationCommand::getHelpString(){ try { string helpString = ""; helpString += "The otu.association command reads a shared or relabund file and calculates the correlation coefficients between otus.\n"; helpString += "If you provide a metadata file, mothur will calculate te correlation bewteen the metadata and the otus.\n"; helpString += "The otu.association command parameters are shared, relabund, metadata, groups, method, cutoff and label. The shared or relabund parameter is required.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distances level you would like used, and are also separated by dashes.\n"; helpString += "The cutoff parameter allows you to set a pvalue at which the otu will be reported.\n"; helpString += "The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n"; helpString += "The otu.association command should be in the following format: otu.association(shared=yourSharedFile, method=yourMethod).\n"; helpString += "Example otu.association(shared=genus.pool.shared, method=kendall).\n"; helpString += "The otu.association command outputs a .otu.corr file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string OTUAssociationCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "otucorr") { pattern = "[filename],[distance],[tag],otu.corr"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** OTUAssociationCommand::OTUAssociationCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputFileName = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; current->setRelAbundFile(relabundfile); } metadatafile = validParameter.validFile(parameters, "metadata"); if (metadatafile == "not open") { abort = true; metadatafile = ""; } else if (metadatafile == "not found") { metadatafile = ""; } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } if (outputdir == ""){ outputdir = util.hasPath(inputFileName); } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for any of these? //give priority to shared, then relabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFileName = sharedfile; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("You must provide either a shared or relabund file.\n"); abort = true; } } } if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may only use one of the following : shared or relabund file.\n"); abort = true; } method = validParameter.valid(parameters, "method"); if (method == "not found"){ method = "pearson"; } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, cutoff); if ((method != "pearson") && (method != "spearman") && (method != "kendall")) { m->mothurOut(method + " is not a valid method. Valid methods are pearson, spearman, and kendall.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "OTUAssociationCommand"); exit(1); } } //********************************************************************************************************************** int OTUAssociationCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (metadatafile != "") { readMetadata(); } //function are identical just different datatypes if (sharedfile != "") { processShared(); } else if (relabundfile != "") { processRelabund(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "execute"); exit(1); } } //********************************************************************************************************************** void OTUAssociationCommand::processShared(){ try { InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (metadatafile != "") { bool error = false; if (metadata[0].size() != lookup->size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup->size()) + " from the shared file.\n"); m->setControl_pressed(true); error=true; } } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "processShared"); exit(1); } } //********************************************************************************************************************** void OTUAssociationCommand::process(SharedRAbundVectors*& lookup){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[distance]"] = lookup->getLabel(); variables["[tag]"] = method; string outputFileName = getOutputFileName("otucorr",variables); outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //column headings if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; } else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; } vector< vector > xy; xy.resize(lookup->getNumBins()); vector sampleNames = lookup->getNamesGroups(); for (int i = 0; i < lookup->getNumBins(); i++) { vector abunds = lookup->getOTU(i); for (int j = 0; j < abunds.size(); j++) { xy[i].push_back(abunds[j]); } } LinearAlgebra linear; vector currentLabels = lookup->getOTUNames(); if (metadatafile == "") {//compare otus for (int i = 0; i < xy.size(); i++) { for (int k = 0; k < i; k++) { if (m->getControl_pressed()) { out.close(); return; } double coef = 0.0; double sig = 0.0; if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); } else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); } else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall.\n"); m->setControl_pressed(true); } if (sig < cutoff) { out << currentLabels[i] << '\t' << currentLabels[k] << '\t' << coef << '\t' << sig << endl; } } } }else { //compare otus to metadata for (int i = 0; i < xy.size(); i++) { for (int k = 0; k < metadata.size(); k++) { if (m->getControl_pressed()) { out.close(); return; } double coef = 0.0; double sig = 0.0; if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); } else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); } else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall.\n"); m->setControl_pressed(true); } if (sig < cutoff) { out << currentLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } } } } out.close(); } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "process"); exit(1); } } //********************************************************************************************************************** void OTUAssociationCommand::processRelabund(){ try { InputData input(relabundfile, "relabund", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundFloatVectors* lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (metadatafile != "") { bool error = false; if (metadata[0].size() != lookup->size()) { m->mothurOut("[ERROR]: You have selected to use " + toString(metadata[0].size()) + " data rows from the metadata file, but " + toString(lookup->size()) + " from the relabund file.\n"); m->setControl_pressed(true); error=true;} } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup); delete lookup; lookup = util.getNextRelabund(input, allLines, userLabels, processedLabels, lastLabel); } } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "processRelabund"); exit(1); } } //********************************************************************************************************************** void OTUAssociationCommand::process(SharedRAbundFloatVectors*& lookup){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileName)); variables["[distance]"] = lookup->getLabel(); variables["[tag]"] = method; string outputFileName = getOutputFileName("otucorr",variables); outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //column headings if (metadatafile == "") { out << "OTUA\tOTUB\t" << method << "Coef\tSignificance\n"; } else { out << "OTUA\tMetadata\t" << method << "Coef\tSignificance\n"; } vector< vector > xy; xy.resize(lookup->getNumBins()); vector sampleNames = lookup->getNamesGroups(); for (int i = 0; i < lookup->getNumBins(); i++) { for (int j = 0; j < sampleNames.size(); j++) { xy[i].push_back(lookup->get(i, sampleNames[j])); } } LinearAlgebra linear; vector currentLabels = lookup->getOTUNames(); if (metadatafile == "") {//compare otus for (int i = 0; i < xy.size(); i++) { for (int k = 0; k < i; k++) { if (m->getControl_pressed()) { out.close(); return; } double coef = 0.0; double sig = 0.0; if (method == "spearman") { coef = linear.calcSpearman(xy[i], xy[k], sig); } else if (method == "pearson") { coef = linear.calcPearson(xy[i], xy[k], sig); } else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall.\n"); m->setControl_pressed(true); } if (sig < cutoff) { out << currentLabels[i] << '\t' << currentLabels[k] << '\t' << coef << '\t' << sig << endl; } } } }else { //compare otus to metadata for (int i = 0; i < xy.size(); i++) { for (int k = 0; k < metadata.size(); k++) { if (m->getControl_pressed()) { out.close(); return; } double coef = 0.0; double sig = 0.0; if (method == "spearman") { coef = linear.calcSpearman(xy[i], metadata[k], sig); } else if (method == "pearson") { coef = linear.calcPearson(xy[i], metadata[k], sig); } else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall.\n"); m->setControl_pressed(true); } if (sig < cutoff) { out << currentLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; } } } } out.close(); } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "process"); exit(1); } } /*****************************************************************/ void OTUAssociationCommand::readMetadata(){ try { ifstream in; util.openInputFile(metadatafile, in); string headerLine = util.getline(in); gobble(in); metadataLabels = util.splitWhiteSpace(headerLine); metadataLabels.erase(metadataLabels.begin()); int count = metadataLabels.size(); SharedRAbundFloatVectors* metadataLookup = new SharedRAbundFloatVectors(); metadataLookup->setLabels("1"); //read rest of file while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return; } string group = ""; in >> group; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: metadata group = " + group + "\n"); } SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector(); tempLookup->setLabel("1"); tempLookup->setGroup(group); for (int i = 0; i < count; i++) { float temp = 0.0; in >> temp; if (m->getDebug()) { m->mothurOut("[DEBUG]: metadata value = " + toString(temp) + "\n"); } tempLookup->push_back(temp); } if (Groups.size() == 0) { metadataLookup->push_back(tempLookup); } else if (util.inUsersGroups(group, Groups)) { metadataLookup->push_back(tempLookup); } gobble(in); } in.close(); metadataLookup->setOTUNames(metadataLabels); metadataLookup->eliminateZeroOTUS(); metadata.resize(metadataLookup->getNumBins()); vector sampleNames = metadataLookup->getNamesGroups(); for (int i = 0; i < metadataLookup->getNumBins(); i++) { for (int j = 0; j < sampleNames.size(); j++) { metadata[i].push_back(metadataLookup->get(i, sampleNames[j])); } } delete metadataLookup; } catch(exception& e) { m->errorOut(e, "OTUAssociationCommand", "readMetadata"); exit(1); } } /*****************************************************************/ mothur-1.48.0/source/commands/otuassociationcommand.h000077500000000000000000000024231424121717000230020ustar00rootroot00000000000000#ifndef OTUASSOCIATIONCOMMAND_H #define OTUASSOCIATIONCOMMAND_H /* * otuassociationcommand.h * Mothur * * Created by westcott on 1/19/12. * Copyright 2012 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" class OTUAssociationCommand : public Command { public: OTUAssociationCommand(string); ~OTUAssociationCommand(){} vector setParameters(); string getCommandName() { return "otu.association"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Otu.association"; } string getDescription() { return "calculate the correlation coefficient for the otus in a shared/relabund file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sharedfile, relabundfile, metadatafile, groups, label, inputFileName, method; bool abort, pickedGroups, allLines; double cutoff; set labels; vector< vector< double> > metadata; vector outputNames, Groups, metadataLabels; void processShared(); void process(SharedRAbundVectors*&); void processRelabund(); void process(SharedRAbundFloatVectors*&); void readMetadata(); }; #endif mothur-1.48.0/source/commands/otuhierarchycommand.cpp000077500000000000000000000476451424121717000230160ustar00rootroot00000000000000/* * otuhierarchycommand.cpp * Mothur * * Created by westcott on 1/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "otuhierarchycommand.h" #include "inputdata.h" //********************************************************************************************************************** vector OtuHierarchyCommand::setParameters(){ try { CommandParameter poutput("output", "Multiple", "name-otulabel", "name", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","otuheirarchy",false,true,true); parameters.push_back(plist); CommandParameter palist("asvlist", "InputTypes", "", "", "none", "none", "none","otuheirarchy",false,true,true); parameters.push_back(palist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; asv = false; vector tempOutNames; outputTypes["otuheirarchy"] = tempOutNames; outputTypes["asvconstaxonomy"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string OtuHierarchyCommand::getHelpString(){ try { string helpString = ""; helpString += "The otu.hierarchy command is used to see how otus relate at two distances, or to see how ASVs relate to OTUs. \n"; helpString += "The otu.hierarchy command parameters are list, asvlist, count, taxonomy, label and output. list and label parameters are required for relating OTUs at different distances. asvlist, list, taxonomy and count are required for ASV to OTU relation. \n"; helpString += "The output parameter allows you to output the names of the sequence in the OTUs or the OTU labels. Options are name and otulabel, default is name. \n"; helpString += "The otu.hierarchy command should be in the following format: \n"; helpString += "otu.hierarchy(list=yourListFile, asvlist=yourAsvListFile, taxonomy=yourTaxonomyFile, count=yourCountFile, label=yourLabels).\n"; helpString += "otu.hierarchy(list=yourListFile, label=yourLabels).\n"; helpString += "Example otu.hierarchy(list=amazon.fn.list, label=0.01-0.03).\n"; helpString += "The otu.hierarchy command outputs a .otu.hierarchy file which is described on the wiki.\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string OtuHierarchyCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; // string issue = "...template is not aligned, aborting. What do I do?"; issues.push_back(issue); //string ianswer = "\tMothur requires the reference file to be aligned to generate aligned sequences. You can download mothur's aligned silva references here, https://mothur.org/wiki/Silva_reference_files. For ITS sequences, see 'how to' below.\n"; ianswers.push_back(ianswer); //issue = "...xxx of your sequences generated alignments that eliminated too many bases... What does this mean?"; issues.push_back(issue); //ianswer = "\tBy default, mothur will align the reverse compliment of your sequences when the alignment process removes more than 50% of the bases indicating the read may be flipped. This process assembles the best possible alignment, and downstream analysis will remove any poor quality reads remaining.\n"; ianswers.push_back(ianswer); string howto = "How do I find the OTUs and taxonomies my ASVs are clustered in?"; howtos.push_back(howto); string hanswer = "\tYou can use the otu.hierarchy command to create a *.cons.taxonomy file. The first column is the ASVLabel, the second column is the abundance of the ASV, and the third column is the ASVs taxonomy with the OTULabel appended.\n\nmothur > otu.hierarchy(list=final.opti_mcc.list, asvlist=final.asv.list, taxonomy=final.taxonomy, count=final.count_table)\n"; hanswers.push_back(hanswer); // howto = "How do I create a custom reference for the region I am studying?"; howtos.push_back(howto); // hanswer = "\tYou can tailor your reference using this method: http://blog.mothur.org/2016/07/07/Customization-for-your-region/.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string OtuHierarchyCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "otuheirarchy") { pattern = "[filename],[distance1],[tag],[distance2],otu.hierarchy"; } else if (type == "asvconstaxonomy") { pattern = "[filename],[tag],asv.cons.taxonomy"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** OtuHierarchyCommand::OtuHierarchyCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listFile = validParameter.validFile(parameters, "list"); if (listFile == "not found") { listFile = current->getListFile(); if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter.\n"); } else { m->mothurOut("No valid current list file. You must provide a list file.\n"); abort = true; } }else if (listFile == "not open") { abort = true; } else { current->setListFile(listFile); } asvlistFile = validParameter.validFile(parameters, "asvlist"); if (asvlistFile == "not found") { asvlistFile = ""; } else if (asvlistFile == "not open") { asvlistFile = ""; abort = true; } else { asv = true; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not found") { taxfile = ""; } else if (taxfile == "not open") { taxfile = ""; abort = true; } else { current->setTaxonomyFile(taxfile); } if (outputdir == ""){ outputdir += util.hasPath(listFile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { if (!asv) { m->mothurOut("[ERROR]: label is a required parameter for the otu.hierarchy command, please correct.\n"); abort = true; } else { m->mothurOut("\nNo label provided, I will use the first label in the list file.\n"); } }else { util.splitAtDash(label, mylabels); if (!asv) { if (mylabels.size() != 2) { m->mothurOut("You must provide 2 labels.\n"); abort = true; } } } output = validParameter.valid(parameters, "output"); if (output == "not found") { output = "name"; } if ((output != "name") && (output != "otulabel")) { m->mothurOut("output options are name and otulabel. I will use name.\n"); output = "name"; } } } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "OtuHierarchyCommand"); exit(1); } } //********************************************************************************************************************** int OtuHierarchyCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (asv) { processASV(); } else { processHierarchy(); } if (m->getControl_pressed()) { outputTypes.clear(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "execute"); exit(1); } } //********************************************************************************************************************** void OtuHierarchyCommand::processASV() { try { set labels; if (mylabels.size() != 0) { labels.insert(*mylabels.begin()); } set processedLabels; set userLabels = labels; string lastLabel = ""; //read otu list file InputData inputOTU(listFile, "list", nullVector); ListVector* list = util.getNextList(inputOTU, false, userLabels, processedLabels, lastLabel); string otuListLable = list->getLabel(); //read taxonomy file map taxMap; map::iterator itTax; util.readTax(taxfile, taxMap, true); //append OTU label to taxonomy for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { return; } string binnames = list->get(i); string otuLabel = list->getOTUName(i); //parse names in bin vector names; util.splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { itTax = taxMap.find(names[j]); if (itTax != taxMap.end()) { itTax->second += otuLabel + ";"; }else{ m->mothurOut("\n[ERROR]: " + names[j] + " is missing from your taxonomy file, please correct.\n"); m->setControl_pressed(true); } } } delete list; //add redundant counts CountTable ct; bool hasCount = false; if (countfile != "") { ct.readTable(countfile, true, false); hasCount = true; } if (m->getControl_pressed()) { return; } //read asvlist file labels.clear(); processedLabels.clear(); lastLabel = ""; userLabels = labels; InputData input(asvlistFile, "list", nullVector); ListVector* asvlist = util.getNextList(input, false, userLabels, processedLabels, lastLabel); string asvLabel = asvlist->getLabel(); if (m->getControl_pressed()) { return; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listFile)); variables["[tag]"] = asvLabel + "-" + otuListLable; string outputFileName = getOutputFileName("asvconstaxonomy",variables); outputNames.push_back(outputFileName); outputTypes["asvconstaxonomy"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); out << "ASVLabel\tASV_Abundance\tTaxonomy_OTULabel\n"; for (int i = 0; i < asvlist->getNumBins(); i++) { if (m->getControl_pressed()) { break; } string binnames = asvlist->get(i); string asvOtuLabel = asvlist->getOTUName(i); //parse names in bin vector names; util.splitAtComma(binnames, names); for (int j = 0; j < names.size(); j++) { itTax = taxMap.find(names[j]); int abund = 1; if (itTax != taxMap.end()) { if (hasCount) { abund = ct.getNumSeqs(names[j]); } out << asvOtuLabel << '\t' << abund << '\t' << itTax->second << endl; }else{ m->mothurOut("\n[ERROR]: " + names[j] + " is missing from your taxonomy file, please correct.\n"); m->setControl_pressed(true); } } } out.close(); delete asvlist; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "processASV"); exit(1); } } //********************************************************************************************************************** void OtuHierarchyCommand::processHierarchy() { try { //get listvectors that correspond to labels requested, (or use smart distancing to get closest listvector) vector< vector > lists = getListVectors(); if (m->getControl_pressed()) { return; } //determine which is little and which is big, putting little first if (lists.size() == 4) { //if big is first swap them if (lists[0].size() < lists[2].size()) { vector< vector > tempLists; tempLists.push_back(lists[2]); tempLists.push_back(lists[3]); tempLists.push_back(lists[0]); tempLists.push_back(lists[1]); lists = tempLists; string tempLabel = list2Label; list2Label = list1Label; list1Label = tempLabel; } }else{ m->mothurOut("[ERROR]: error getting listvectors, unable to read 2 different vectors, check your label inputs.\n"); return; } //map sequences to bin number in the "little" otu map littleBins; vector binLabels0 = lists[0]; for (int i = 0; i < lists[0].size(); i++) { if (m->getControl_pressed()) { return; } string bin = lists[1][i]; vector names; util.splitAtComma(bin, names); for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; } } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listFile)); variables["[distance1]"] = list1Label; variables["[tag]"] = "-"; variables["[distance2]"] = list2Label; string outputFileName = getOutputFileName("otuheirarchy",variables); outputNames.push_back(outputFileName); outputTypes["otuheirarchy"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); //go through each bin in "big" otu and output the bins in "little" otu which created it vector binLabels1 = lists[2]; for (int i = 0; i < lists[2].size(); i++) { if (m->getControl_pressed()) { break; } string binnames = lists[3][i]; vector names; util.splitAtComma(binnames, names); //output column 1 if (output == "name") { out << binnames << '\t'; } else { out << binLabels1[i] << '\t'; } map bins; //bin numbers in little that are in this bin in big map::iterator it; //parse bin for (int j = 0; j < names.size(); j++) { bins[littleBins[names[j]]] = littleBins[names[j]]; } string col2 = ""; for (it = bins.begin(); it != bins.end(); it++) { if (output == "name") { col2 += lists[1][it->first] + "\t"; } else { col2 += binLabels0[it->first] + "\t"; } } //output column 2 out << col2 << endl; } out.close(); } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "processHierarchy"); exit(1); } } //********************************************************************************************************************** //returns a vector of listVectors where "little" vector is first vector< vector > OtuHierarchyCommand::getListVectors() { //return value [0] -> otulabelsFirstLabel [1] -> binsFirstLabel [2] -> otulabelsSecondLabel [3] -> binsSecondLabel try { vector< vector > lists; int count = 0; for (set::iterator it = mylabels.begin(); it != mylabels.end(); it++) { string realLabel; vector< vector > thisList = getListVector(*it, realLabel); if (m->getControl_pressed()) { return lists; } for (int i = 0; i < thisList.size(); i++) { lists.push_back(thisList[i]); } if (count == 0) { list1Label = realLabel; count++; } else { list2Label = realLabel; } } return lists; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getListVectors"); exit(1); } } //********************************************************************************************************************** vector< vector > OtuHierarchyCommand::getListVector(string label, string& realLabel){ //return value [0] -> otulabels [1] -> bins try { vector< vector > myList; InputData input(listFile, "list", nullVector); set labels; labels.insert(label); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, false, userLabels, processedLabels, lastLabel); if (list != nullptr) { //at this point the list vector has the right distance vector bins, listlabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { return myList; } bins.push_back(list->get(i)); listlabels.push_back(list->getOTUName(i)); } myList.push_back(listlabels); myList.push_back(bins); realLabel = list->getLabel(); delete list; } return myList; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getListVector"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/otuhierarchycommand.h000077500000000000000000000026571424121717000224550ustar00rootroot00000000000000#ifndef OTUHIERARCHYCOMMAND_H #define OTUHIERARCHYCOMMAND_H /* * otuhierarchycommand.h * Mothur * * Created by westcott on 1/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" //********************************************************************************************************************** class OtuHierarchyCommand : public Command { public: OtuHierarchyCommand(string); ~OtuHierarchyCommand(){} vector setParameters(); string getCommandName() { return "otu.hierarchy"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Otu.hierarchy"; } string getDescription() { return "relates OTUs at different distances"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, asv; set mylabels; //holds labels to be used string label, listFile, asvlistFile, taxfile, countfile, output, list1Label, list2Label; vector outputNames; void processHierarchy(); void processASV(); vector< vector > getListVectors(); vector< vector > getListVector(string, string&); }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commands/pairwiseseqscommand.cpp000066400000000000000000001354571424121717000230230ustar00rootroot00000000000000/* * pairwiseseqscommand.cpp * Mothur * * Created by westcott on 10/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "pairwiseseqscommand.h" #include "kmerdist.hpp" //********************************************************************************************************************** vector PairwiseSeqsCommand::setParameters(){ try { CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "OldFastaColumn","column",false,false); parameters.push_back(pcolumn); CommandParameter poldfasta("oldfasta", "InputTypes", "", "", "none", "none", "OldFastaColumn","",false,false); parameters.push_back(poldfasta); CommandParameter pfitcalc("fitcalc", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfitcalc); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","phylip-column",false,true,true); parameters.push_back(pfasta); CommandParameter palign("align", "Multiple", "needleman-gotoh-noalign", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter poutput("output", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false,true); parameters.push_back(poutput); CommandParameter pcalc("calc", "Multiple", "nogaps-eachgap-onegap", "onegap", "", "", "","",false,false); parameters.push_back(pcalc); CommandParameter pcountends("countends", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pcountends); CommandParameter pcompress("compress", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); CommandParameter pcutoff("cutoff", "Number", "", "1.0", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter pkcutoff("kmercutoff", "Number", "", "-1.0", "", "", "","",false,false,true); parameters.push_back(pkcutoff); CommandParameter pksize("ksize", "Number", "", "8", "", "", "","",false,false); parameters.push_back(pksize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PairwiseSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The pairwise.seqs command reads a fasta file and creates distance matrix.\n"; helpString += "The pairwise.seqs command parameters are fasta, align, match, mismatch, gapopen, gapextend, calc, output, cutoff, oldfasta, column, processors.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman and noalign. The default is needleman.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; helpString += "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap.\n"; helpString += "The countends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T.\n"; helpString += "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0.\n"; helpString += "The kmercutoff parameter allows you to specify maximum kmer distance. The kmercutoff is used to reduce the processing time by avoiding the aligning and distance calculations for sequences with a kmer distance above the cutoff. Kmer distance are calculated using methods described here, Edgar, R. C. (2004). Muscle: a multiple sequence alignment method with reduced time and space complexity. BMC Bioinformatics, 5:113. The defaults vary based on the cutoff selected. Cutoff <= 0.05 -> kmerCutoff = -1.0, cutoff 0.05 - 0.15 -> kmerCutoff = -0.50, cutoff 0.15-0.25 -> kmerCutoff = -0.25, cutoff > 0.25 -> kmerCutoff = -0.10.\n"; helpString += "The ksize parameter allows you to specify the kmer size for calculating the kmer distance. The default is 7.\n"; helpString += "The output parameter allows you to specify format of your distance matrix. Options are column, lt, and square. The default is column.\n"; helpString += "The oldfasta and column parameters allow you to append the distances calculated to the column file.\n"; helpString += "The compress parameter allows you to indicate that you want the resulting distance file compressed. The default is false.\n"; helpString += "The pairwise.seqs command should be in the following format: \n"; helpString += "pairwise.seqs(fasta=yourfastaFile, align=yourAlignmentMethod) \n"; helpString += "Example pairwise.seqs(fasta=candidate.fasta, align=gotoh)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PairwiseSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylip") { pattern = "[filename],[outputtag],dist"; } else if (type == "column") { pattern = "[filename],dist"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** PairwiseSeqsCommand::PairwiseSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastaFileName = validParameter.validFile(parameters, "fasta"); if (fastaFileName == "not found") { fastaFileName = current->getFastaFile(); if (fastaFileName != "") { m->mothurOut("Using " + fastaFileName + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else if (fastaFileName == "not open") { abort = true; } else{ current->setFastaFile(fastaFileName); } if (outputdir == "") { outputdir += util.hasPath(fastaFileName); } oldfastafile = validParameter.validFile(parameters, "oldfasta"); if (oldfastafile == "not found") { oldfastafile = ""; } else if (oldfastafile == "not open") { abort = true; } column = validParameter.validFile(parameters, "column"); if (column == "not found") { column = ""; } else if (column == "not open") { abort = true; } else { current->setColumnFile(column); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "match"); if (temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "mismatch"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, misMatch); if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapopen"); if (temp == "not found"){ temp = "-2.0"; } util.mothurConvert(temp, gapOpen); if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapextend"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "cutoff"); if(temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "kmercutoff"); if(temp == "not found"){ if (cutoff <= 0.05) { kmerCutoff = -1.0; } else if ((cutoff > 0.05) && (cutoff <= 0.15)) { kmerCutoff = -0.50; } else if ((cutoff > 0.15) && (cutoff <= 0.25)) { kmerCutoff = -0.25; } else { kmerCutoff = -0.1; } }else { util.mothurConvert(temp, kmerCutoff); } temp = validParameter.valid(parameters, "ksize"); if (temp == "not found"){ temp = "7"; } util.mothurConvert(temp, kmerSize); temp = validParameter.valid(parameters, "countends"); if(temp == "not found"){ temp = "T"; } countends = util.isTrue(temp); temp = validParameter.valid(parameters, "compress"); if(temp == "not found"){ temp = "F"; } compress = util.isTrue(temp); align = validParameter.valid(parameters, "align"); if (align == "not found"){ align = "needleman"; } temp = validParameter.valid(parameters, "fitcalc"); if(temp == "not found"){ temp = "F"; } fitCalc = util.isTrue(temp); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "column"; } if (output=="phylip") { output = "lt"; } if ((output != "column") && (output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are column, lt and square. I will use column.\n"); output = "column"; } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "onegap"; } else { if (calc == "default") { calc = "onegap"; } } if ((calc != "nogaps") && (calc != "eachgap") && (calc != "onegap")) { m->mothurOut(calc + " is not a valid calculator for pairwise.seqs. Options are onegap, eachgap and nogaps. I will use onegap.\n"); calc = "onegap"; } } } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand"); exit(1); } } //********************************************************************************************************************** PairwiseSeqsCommand::PairwiseSeqsCommand(StorageDatabase*& storageDB, vector< vector< int > > kDB, vector< int > lths, string outputFileRoot, double cut, string outputformat, int proc) { try { abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; //defaults calc = "onegap"; countends = true; fitCalc = false; cutoff = cut; processors = proc; compress = false; output = outputformat; match = 1.0; misMatch = -1.0; gapOpen = -2.0; gapExtend = -1.0; align = "needleman"; kmerSize = 7; kmerDB = kDB; lengths = lths; if (cutoff <= 0.05) { kmerCutoff = -1.0; } else if ((cutoff > 0.05) && (cutoff <= 0.15)) { kmerCutoff = -0.50; } else if ((cutoff > 0.15) && (cutoff <= 0.25)) { kmerCutoff = -0.25; } else { kmerCutoff = -0.1; } longestBase = 2000; //will need to update this in driver if we find sequences with more bases. hardcoded so we don't have the pre-read user fasta file. numDistsBelowCutoff = 0; alignDB = storageDB; long long numSeqs = alignDB->getNumSeqs(); if (numSeqs < 2) { m->mothurOut("[ERROR]: you must have at least 2 sequences to calculate the distances, aborting.\n"); return; } string outputFile; map variables; variables["[filename]"] = outputFileRoot; if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); }else if (output == "column") { //user wants column format if (fitCalc) { variables["[outputtag]"] = "fit"; } outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); util.mothurRemove(outputFile); } time_t start, end; time(&start); m->mothurOut("\nSequence\tTime\tNum_Dists_Below_Cutoff\n"); createProcesses(outputFile); time(&end); m->mothurOut("\nIt took " + toString(difftime(end, start)) + " secs to find distances for " + toString(numSeqs) + " sequences. " + toString(numDistsBelowCutoff) + " distances below cutoff " + toString(cutoff) + ".\n\n"); m->mothurOut("\nOutput File Names:\n"); m->mothurOut(outputFile+"\n\n"); } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "PairwiseSeqsCommand"); exit(1); } } //********************************************************************************************************************** int PairwiseSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } time_t start, end; time(&start); longestBase = 2000; //will need to update this in driver if we find sequences with more bases. hardcoded so we don't have the pre-read user fasta file. numDistsBelowCutoff = 0; ifstream inFASTA; util.openInputFile(fastaFileName, inFASTA); alignDB = new SequenceDB(inFASTA, kmerSize, kmerDB, lengths); inFASTA.close(); //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB if ((oldfastafile != "") && (column != "")) { if (!(sanityCheck())) { return 0; } } if (m->getControl_pressed()) { delete alignDB; return 0; } long long numSeqs = alignDB->getNumSeqs(); string outputFile = ""; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastaFileName)); if ((oldfastafile != "") && (column != "")) { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(oldfastafile)); } if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); }else if (output == "column") { //user wants column format if (fitCalc) { variables["[outputtag]"] = "fit"; } outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); util.mothurRemove(outputFile); }else { //assume square variables["[outputtag]"] = "square"; outputFile = getOutputFileName("phylip", variables); util.mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } m->mothurOut("\nSequence\tTime\tNum_Dists_Below_Cutoff\n"); createProcesses(outputFile); delete alignDB; if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFile); return 0; } if(util.isBlank(outputFile)) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.\n"); } //append the old column file to the new one if ((oldfastafile != "") && (column != "")) { //we had to rename the column file so we didnt overwrite above, but we want to keep old name if (outputFile == column) { string tempcolumn = column + ".old"; util.appendFiles(tempcolumn, outputFile); util.mothurRemove(tempcolumn); }else{ util.appendFiles(outputFile, column); util.mothurRemove(outputFile); outputFile = column; } outputTypes["column"].clear(); outputTypes["column"].push_back(outputFile); } if (compress) { m->mothurOut("Compressing...\n"); m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)\n"); system(("gzip -v " + outputFile).c_str()); outputNames.push_back(outputFile + ".gz"); }else { outputNames.push_back(outputFile); } time(&end); m->mothurOut("\nIt took " + toString(difftime(end, start)) + " secs to find distances for " + toString(numSeqs) + " sequences. " + toString(numDistsBelowCutoff) + " distances below cutoff " + toString(cutoff) + ".\n\n"); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "execute"); exit(1); } } /**************************************************************************************************/ struct pairwiseData { string align, distcalcType, outputFileName; unsigned long long start, end; long long count; float match, misMatch, gapOpen, gapExtend, cutoff, kmerCutoff; int longestBase, kmerSize; bool countends; vector< vector< int > > kmerDB; //kmerDB[0] = vector maxKmers long, contains kmer counts vector< vector< int > > oldkmerDB; //kmerDB[0] = vector maxKmers long, contains kmer counts vector< int > lengths; vector< int > oldlengths; StorageDatabase* alignDB; SequenceDB oldFastaDB; OutputWriter* threadWriter; MothurOut* m; Utils util; pairwiseData(){} pairwiseData(OutputWriter* ofn) { threadWriter = ofn; m = MothurOut::getInstance(); } pairwiseData(string ofn) { outputFileName = ofn; m = MothurOut::getInstance(); } void setVariables(string al, string di, bool co, string op, StorageDatabase* DB, SequenceDB oldDB, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int thr, float cu, float kcut, int ksz, vector< vector< int > > kdb, vector< int > le, vector< vector< int > > okdb, vector< int > ole) { align = al; distcalcType = di; countends = co; alignDB = DB; oldFastaDB = oldDB; cutoff = cu; start = st; end = en; match = ma; misMatch = misMa; gapOpen = gapO; gapExtend = gapE; longestBase = thr; kmerDB = kdb; oldkmerDB = okdb; lengths = le; oldlengths = ole; kmerSize = ksz; kmerCutoff = kcut; count = 0; } }; /***********************************************************************/ vector getUniqueKmers(vector seqsKmers, int i){ vector uniques; for (int k = 0; k < seqsKmers.size(); k++) { if (seqsKmers[k] != 0) { kmerCount thisKmer(k, seqsKmers[k]); uniques.push_back(thisKmer); } } return uniques; } /**************************************************************************************************/ //the higher the kmercutoff the higher the aligned dist. As kmercutoff approaches 0, aligned dist aproaches 1. int driverColumn(pairwiseData* params){ try { int startTime = time(nullptr); Alignment* alignment; if(params->align == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, params->longestBase); } else if(params->align == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } else if(params->align == "noalign") { alignment = new NoAlign(); } else { params->m->mothurOut(params->align + " is not a valid alignment option. I will run the command using needleman.\n"); alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } ValidCalculators validCalculator; DistCalc* distCalculator; if (params->countends) { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->distcalcType == "eachgap") { distCalculator = new eachGapDist(params->cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapDist(params->cutoff); } //else if (params->distcalcType == "onegap") { distCalculator = new oneGapDist(1.0); } } }else { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(params->cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(params->cutoff); } } } KmerDist kmerDistCalculator(params->kmerSize); double kmerCutoff = params->kmerCutoff; for(int i=params->start;iend;i++){ Sequence seq = params->alignDB->getSeq(i); vector seqA = getUniqueKmers(params->kmerDB[i], i); for(int j=0;jm->getControl_pressed()) { break; } vector seqB = params->kmerDB[j]; int length = min(params->lengths[i], params->lengths[j]); vector kmerDist = kmerDistCalculator.calcDist(seqA, seqB, length); if (kmerDist[0] <= kmerCutoff) { Sequence seqI = seq; Sequence seqJ = params->alignDB->getSeq(j); if (seq.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seq.getUnaligned().length()+1); } if (seqJ.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seqJ.getUnaligned().length()+1); } alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); double dist = distCalculator->calcDist(seqI, seqJ); if(dist <= params->cutoff){ params->count++; params->threadWriter->write(seqI.getName() + ' ' + seqJ.getName() + ' ' + toString(dist) + "\n"); } } } if((i+1) % 100 == 0){ params->m->mothurOutJustToScreen(toString(i+1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); } } params->m->mothurOutJustToScreen(toString(params->end-1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); delete alignment; delete distCalculator; return 0; } catch(exception& e) { params->m->errorOut(e, "PairwiseSeqsCommand", "driver"); exit(1); } } /**************************************************************************************************/ int driverFitCalc(pairwiseData* params){ try { int startTime = time(nullptr); Alignment* alignment; if(params->align == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, params->longestBase); } else if(params->align == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } else if(params->align == "noalign") { alignment = new NoAlign(); } else { params->m->mothurOut(params->align + " is not a valid alignment option. I will run the command using needleman.\n"); alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } ValidCalculators validCalculator; DistCalc* distCalculator; if (params->countends) { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->distcalcType == "eachgap") { distCalculator = new eachGapDist(params->cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapDist(params->cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(params->cutoff); } else if (params->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(params->cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(params->cutoff); } } } KmerDist kmerDistCalculator(params->kmerSize); double kmerCutoff = params->kmerCutoff; for(int i=params->start;iend;i++){ //for each oldDB fasta seq calc the distance to every new seq in alignDB Sequence seq = params->oldFastaDB.getSeq(i); vector seqA = getUniqueKmers(params->oldkmerDB[i], i); for(int j = 0; j < params->alignDB->getNumSeqs(); j++){ if (params->m->getControl_pressed()) { break; } vector seqB = params->kmerDB[j]; int length = min(params->oldlengths[i], params->lengths[j]); vector kmerDist = kmerDistCalculator.calcDist(seqA, seqB, length); if (kmerDist[0] <= kmerCutoff) { Sequence seqI = seq; Sequence seqJ = params->alignDB->getSeq(j); if (seq.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seq.getUnaligned().length()+1); } if (seqJ.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seqJ.getUnaligned().length()+1); } alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); double dist = distCalculator->calcDist(seqI, seqJ); if(dist <= params->cutoff){ params->count++; params->threadWriter->write(seqI.getName() + ' ' + seqJ.getName() + ' ' + toString(dist) + "\n"); } } } if((i+1) % 100 == 0){ params->m->mothurOutJustToScreen(toString(i+1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); } } params->m->mothurOutJustToScreen(toString(params->end-1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); delete alignment; delete distCalculator; return 0; } catch(exception& e) { params->m->errorOut(e, "PairwiseSeqsCommand", "driverFitCalc"); exit(1); } } /**************************************************************************************************/ int driverLt(pairwiseData* params){ try { int startTime = time(nullptr); Alignment* alignment; if(params->align == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, params->longestBase); } else if(params->align == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } else if(params->align == "noalign") { alignment = new NoAlign(); } else { params->m->mothurOut(params->align + " is not a valid alignment option. I will run the command using needleman.\n"); alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } ValidCalculators validCalculator; DistCalc* distCalculator; double cutoff = 1.0; if (params->countends) { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->distcalcType == "eachgap") { distCalculator = new eachGapDist(cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapDist(cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(cutoff); } } } //column file ofstream outFile; params->util.openOutputFile(params->outputFileName, outFile); outFile.setf(ios::fixed, ios::showpoint); outFile << setprecision(4); if(params->start == 0){ outFile << params->alignDB->getNumSeqs() << endl; } for(int i=params->start;iend;i++){ Sequence seq = params->alignDB->getSeq(i); if (seq.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seq.getUnaligned().length()+1); } string name = seq.getName(); if (name.length() < 10) { while (name.length() < 10) { name += " "; } seq.setName(name); } //pad with spaces to make compatible outFile << name; for(int j=0;jm->getControl_pressed()) { break; } Sequence seqI = seq; Sequence seqJ = params->alignDB->getSeq(j); if (seqJ.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seqJ.getUnaligned().length()+1); } alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); double dist = distCalculator->calcDist(seqI, seqJ); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' + alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); } if(dist <= params->cutoff){ params->count++; } outFile << '\t' << dist; } outFile << endl; if(i % 100 == 0){ params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); } } params->m->mothurOutJustToScreen(toString(params->end-1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); outFile.close(); delete alignment; delete distCalculator; return 1; } catch(exception& e) { params->m->errorOut(e, "PairwiseSeqsCommand", "driver"); exit(1); } } /**************************************************************************************************/ int driverSquare(pairwiseData* params){ try { int startTime = time(nullptr); Alignment* alignment; if(params->align == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, params->longestBase); } else if(params->align == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } else if(params->align == "noalign") { alignment = new NoAlign(); } else { params->m->mothurOut(params->align + " is not a valid alignment option. I will run the command using needleman.\n"); alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, params->longestBase); } ValidCalculators validCalculator; DistCalc* distCalculator; double cutoff = 1.0; if (params->countends) { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->distcalcType == "eachgap") { distCalculator = new eachGapDist(cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapDist(cutoff); } } }else { if (validCalculator.isValidCalculator("distance", params->distcalcType) ) { if (params->distcalcType == "nogaps") { distCalculator = new ignoreGaps(cutoff); } else if (params->distcalcType == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(cutoff); } else if (params->distcalcType == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(cutoff); } } } //column file ofstream outFile; params->util.openOutputFile(params->outputFileName, outFile); outFile.setf(ios::fixed, ios::showpoint); outFile << setprecision(4); long long numSeqs = params->alignDB->getNumSeqs(); if(params->start == 0){ outFile << numSeqs << endl; } for(int i=params->start;iend;i++){ Sequence seq = params->alignDB->getSeq(i); if (seq.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seq.getUnaligned().length()+1); } string name = seq.getName(); if (name.length() < 10) { while (name.length() < 10) { name += " "; } seq.setName(name); } //pad with spaces to make compatible outFile << name; for(int j=0;jm->getControl_pressed()) { break; } Sequence seqI = seq; Sequence seqJ = params->alignDB->getSeq(j); if (seqJ.getUnaligned().length() > alignment->getnRows()) { alignment->resize(seqJ.getUnaligned().length()+1); } alignment->align(seqI.getUnaligned(), seqJ.getUnaligned()); seqI.setAligned(alignment->getSeqAAln()); seqJ.setAligned(alignment->getSeqBAln()); double dist = distCalculator->calcDist(seqI, seqJ); if(dist <= params->cutoff){ params->count++; } outFile << '\t' << dist; if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + seqI.getName() + '\t' + alignment->getSeqAAln() + '\n' + seqJ.getName() + alignment->getSeqBAln() + '\n' + "distance = " + toString(dist) + "\n"); } } outFile << endl; if(i % 100 == 0){ params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); } } params->m->mothurOutJustToScreen(toString(params->end-1) + "\t" + toString(time(nullptr) - startTime)+ "\t" + toString(params->count) +"\n"); outFile.close(); delete alignment; delete distCalculator; return 1; } catch(exception& e) { params->m->errorOut(e, "PairwiseSeqsCommand", "driver"); exit(1); } } /**************************************************************************************************/ void PairwiseSeqsCommand::createProcesses(string filename) { try { vector lines; vector workerThreads; vector data; long long numSeqs = alignDB->getNumSeqs(); long long numDists = 0; if (output == "square") { numDists = numSeqs * numSeqs; } else { for(int i=0;i processors) { break; } } } } if (numDists < processors) { processors = numDists; } for (int i = 0; i < processors; i++) { linePair tempLine; lines.push_back(tempLine); if (output != "square") { lines[i].start = int (sqrt(float(i)/float(processors)) * numSeqs); lines[i].end = int (sqrt(float(i+1)/float(processors)) * numSeqs); }else{ lines[i].start = int ((float(i)/float(processors)) * numSeqs); lines[i].end = int ((float(i+1)/float(processors)) * numSeqs); } } auto synchronizedOutputFile = std::make_shared(filename); synchronizedOutputFile->setFixedShowPoint(); synchronizedOutputFile->setPrecision(4); SequenceDB oldFastaDB; vector< int > oldlengths; vector< vector< int > > oldkmerDB; if (fitCalc) { ifstream inFASTA; util.openInputFile(oldfastafile, inFASTA); oldFastaDB = SequenceDB(inFASTA, kmerSize, oldkmerDB, oldlengths); inFASTA.close(); lines.clear(); if (processors > oldFastaDB.getNumSeqs()) { processors = oldFastaDB.getNumSeqs(); } int remainingSeqs = oldFastaDB.getNumSeqs(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numSeqsToFit = remainingSeqs; //case for last processor if (remainingProcessors != 1) { numSeqsToFit = ceil(remainingSeqs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numSeqsToFit))); //startIndex, endIndex startIndex = startIndex + numSeqsToFit; remainingSeqs -= numSeqsToFit; } } //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadWriter = nullptr; pairwiseData* dataBundle = nullptr; string extension = toString(i+1) + ".temp"; if (output == "column") { threadWriter = new OutputWriter(synchronizedOutputFile); dataBundle = new pairwiseData(threadWriter); }else { dataBundle = new pairwiseData(filename+extension); } dataBundle->setVariables(align, calc, countends, output, alignDB, oldFastaDB, lines[i+1].start, lines[i+1].end, match, misMatch, gapOpen, gapExtend, longestBase, cutoff, kmerCutoff, kmerSize, kmerDB, lengths, oldkmerDB, oldlengths); data.push_back(dataBundle); std::thread* thisThread = nullptr; if (output == "column") { if (fitCalc) { thisThread = new std::thread(driverFitCalc, dataBundle); } else { thisThread = new std::thread(driverColumn, dataBundle); } } else if (output == "lt") { thisThread = new std::thread(driverLt, dataBundle); } else { thisThread = new std::thread(driverSquare, dataBundle); } workerThreads.push_back(thisThread); } OutputWriter* threadWriter = nullptr; pairwiseData* dataBundle = nullptr; if (output == "column") { threadWriter = new OutputWriter(synchronizedOutputFile); dataBundle = new pairwiseData(threadWriter); }else { dataBundle = new pairwiseData(filename); } dataBundle->setVariables(align, calc, countends, output, alignDB, oldFastaDB, lines[0].start, lines[0].end, match, misMatch, gapOpen, gapExtend, longestBase, cutoff, kmerCutoff, kmerSize, kmerDB, lengths, oldkmerDB, oldlengths); if (output == "column") { if (fitCalc) { driverFitCalc(dataBundle); } else { driverColumn(dataBundle); } delete threadWriter; } else if (output == "lt") { driverLt(dataBundle); } else { driverSquare(dataBundle); } numDistsBelowCutoff = dataBundle->count; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); numDistsBelowCutoff += data[i]->count; if (output == "column") { delete data[i]->threadWriter; } else { string extension = toString(i+1) + ".temp"; util.appendFiles((filename+extension), filename); util.mothurRemove(filename+extension); } delete data[i]; delete workerThreads[i]; } delete dataBundle; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "createProcesses"); exit(1); } } /**************************************************************************************************/ //its okay if the column file does not contain all the names in the fasta file, since some distance may have been above a cutoff, //but no sequences can be in the column file that are not in oldfasta. also, if a distance is above the cutoff given then remove it. bool PairwiseSeqsCommand::sanityCheck() { try{ bool good = true; //read fasta file and save names as well as adding them to the alignDB set namesOldFasta; ifstream inFasta; util.openInputFile(oldfastafile, inFasta); while (!inFasta.eof()) { if (m->getControl_pressed()) { inFasta.close(); return good; } Sequence temp(inFasta); gobble(inFasta); if (temp.getName() != "") { namesOldFasta.insert(temp.getName()); //save name if (!fitCalc) { alignDB->push_back(temp); }//add to DB } } inFasta.close(); //read through the column file checking names and removing distances above the cutoff ifstream inDist; util.openInputFile(column, inDist); ofstream outDist; string outputFile = column + ".temp"; util.openOutputFile(outputFile, outDist); string name1, name2; float dist; while (!inDist.eof()) { if (m->getControl_pressed()) { inDist.close(); outDist.close(); util.mothurRemove(outputFile); return good; } inDist >> name1; gobble(inDist); inDist >> name2; gobble(inDist); inDist >> dist; gobble(inDist); //both names are in fasta file and distance is below cutoff if ((namesOldFasta.count(name1) == 0) || (namesOldFasta.count(name2) == 0)) { good = false; break; } else{ if (dist <= cutoff) { numDistsBelowCutoff++; outDist << name1 << '\t' << name2 << '\t' << dist << endl; } } } inDist.close(); outDist.close(); if (good) { util.mothurRemove(column); rename(outputFile.c_str(), column.c_str()); }else{ util.mothurRemove(outputFile); //temp file is bad because file mismatch above } return good; } catch(exception& e) { m->errorOut(e, "PairwiseSeqsCommand", "sanityCheck"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/pairwiseseqscommand.h000077500000000000000000000042441424121717000224600ustar00rootroot00000000000000#ifndef PAIRWISESEQSCOMMAND_H #define PAIRWISESEQSCOMMAND_H /* * pairwiseseqscommand.h * Mothur * * Created by westcott on 10/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "searchdatabase.hpp" #include "alignment.hpp" #include "validcalculator.h" #include "calculator.h" #include "sequencedb.h" #include "sequence.hpp" #include "gotohoverlap.hpp" #include "needlemanoverlap.hpp" #include "noalign.hpp" #include "ignoregaps.h" #include "eachgapdist.h" #include "eachgapignore.h" #include "onegapdist.h" #include "onegapignore.h" #include "writer.h" class PairwiseSeqsCommand : public Command { public: PairwiseSeqsCommand(string); PairwiseSeqsCommand(StorageDatabase*&, vector< vector< int > > kmerDB, vector< int > lengths, string, double, string, int); //used by mothur's splitMatrix class to avoid rereading files ~PairwiseSeqsCommand() = default; vector setParameters(); string getCommandName() { return "pairwise.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Needleman SB, Wunsch CD (1970). A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 48: 443-53. [ for needleman ]\nGotoh O (1982). An improved algorithm for matching biological sequences. J Mol Biol 162: 705-8. [ for gotoh ] \nhttp://www.mothur.org/wiki/Pairwise.seqs"; } string getDescription() { return "calculates pairwise distances from an unaligned fasta file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: StorageDatabase* alignDB; void createProcesses(string); bool sanityCheck(); bool abort, countends, compress, fitCalc; string fastaFileName, align, calc, output, oldfastafile, column; float match, misMatch, gapOpen, gapExtend, cutoff, kmerCutoff; int processors, longestBase, numDistsBelowCutoff, kmerSize; vector outputNames; vector< vector< int > > kmerDB; //kmerDB[0] = vector maxKmers long, contains kmer counts vector< int > lengths; }; #endif mothur-1.48.0/source/commands/parsimonycommand.cpp000077500000000000000000000512341424121717000223160ustar00rootroot00000000000000/* * parsimonycommand.cpp * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "parsimonycommand.h" //********************************************************************************************************************** vector ParsimonyCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","parsimony-psummary",false,true,true); parameters.push_back(ptree); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter prandom("random", "String", "", "", "", "", "","",false,false); parameters.push_back(prandom); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["parsimony"] = tempOutNames; outputTypes["psummary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ParsimonyCommand::getHelpString(){ try { string helpString = ""; helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"; helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"; helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "Example parsimony(random=out, iters=500).\n"; helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n"; helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ParsimonyCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "parsimony") { pattern = "[filename],parsimony"; } else if (type == "psummary") { pattern = "[filename],psummary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "getOutputPattern"); exit(1); } } /***********************************************************/ ParsimonyCommand::ParsimonyCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; randomtree = validParameter.valid(parameters, "random"); if (randomtree == "not found") { randomtree = ""; } //are you trying to use parsimony without reading a tree or saying you want random distribution if (randomtree == "") { //check for required parameters treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("You have no current tree file and the tree parameter is required.\n"); abort = true; } }else { current->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } } if (outputdir == ""){ if (randomtree == "") { outputdir += util.hasPath(treefile); } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } itersString = validParameter.valid(parameters, "iters"); if (itersString == "not found") { itersString = "1000"; } util.mothurConvert(itersString, iters); string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); } } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand"); exit(1); } } /***********************************************************/ int ParsimonyCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } Treenames = util.parseTreeFile(treefile); //extract treenames //randomtree will tell us if user had their own treefile or if they just want the random distribution //user has entered their own tree if (randomtree == "") { current->setTreeFile(treefile); TreeReader* reader; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } else { reader = new TreeReader(treefile, countfile); } T = reader->getTrees(); ct = T[0]->getCountTable(); delete reader; if(outputdir == "") { outputdir += util.hasPath(treefile); } map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile) + "."; output = new ColumnFile(getOutputFileName("parsimony",variables), itersString); outputNames.push_back(getOutputFileName("parsimony",variables)); outputTypes["parsimony"].push_back(getOutputFileName("parsimony",variables)); sumFile = getOutputFileName("psummary",variables); util.openOutputFile(sumFile, outSum); outputNames.push_back(sumFile); outputTypes["psummary"].push_back(sumFile); }else { //user wants random distribution getUserInput(); if(outputdir == "") { outputdir += util.hasPath(randomtree); } output = new ColumnFile(outputdir+ util.getSimpleName(randomtree), itersString); outputNames.push_back(outputdir+ util.getSimpleName(randomtree)); outputTypes["parsimony"].push_back(outputdir+ util.getSimpleName(randomtree)); } //set users groups to analyze vector tGroups = ct->getNamesOfGroups(); //check that groups are valid for (int i = 0; i < Groups.size(); i++) { if (!util.inUsersGroups(Groups[i], tGroups)) { m->mothurOut(Groups[i] + " is not a valid group, and will be disregarded.\n"); // erase the invalid group from userGroups Groups.erase(Groups.begin()+i); i--; } } if (Groups.size() == 0) { Groups = tGroups; } util.getCombos(groupComb, Groups, numComp); if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } if (numComp < processors) { m->mothurOut("Reducing processors to " + toString(numComp) + ".\n"); } Parsimony pars(Groups); counter = 0; if (m->getControl_pressed()) { delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //get pscore for users tree userData.resize(numComp,0); //data = AB, AC, BC, ABC. randomData.resize(numComp,0); //data = AB, AC, BC, ABC. rscoreFreq.resize(numComp); uscoreFreq.resize(numComp); rCumul.resize(numComp); uCumul.resize(numComp); userTreeScores.resize(numComp); UScoreSig.resize(numComp); if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { userData = pars.getValues(T[i], processors, outputdir); //data = AB, AC, BC, ABC. if (m->getControl_pressed()) { delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //output scores for each combination for(int k = 0; k < numComp; k++) { //update uscoreFreq map::iterator it = uscoreFreq[k].find(userData[k]); if (it == uscoreFreq[k].end()) {//new score uscoreFreq[k][userData[k]] = 1; }else{ uscoreFreq[k][userData[k]]++; } //add users score to valid scores validScores[userData[k]] = userData[k]; //save score for summary file userTreeScores[k].push_back(userData[k]); } } Utils* stableRandom = new Utils(); //get pscores for random trees for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(ct, Treenames); //create random relationships between nodes randT->assembleRandomTree(stableRandom); //get pscore of random tree randomData = pars.getValues(randT, processors, outputdir); if (m->getControl_pressed()) { delete output; delete randT; delete stableRandom; if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores map::iterator it = rscoreFreq[r].find(randomData[r]); if (it != rscoreFreq[r].end()) {//already have that score rscoreFreq[r][randomData[r]]++; }else{//first time we have seen this score rscoreFreq[r][randomData[r]] = 1; } //add randoms score to validscores validScores[randomData[r]] = randomData[r]; } delete randT; } delete stableRandom; }else { Utils* stableRandom = new Utils(); //get pscores for random trees for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(ct, Treenames); //create random relationships between nodes randT->assembleRandomTree(stableRandom); if (m->getControl_pressed()) { delete output; delete randT; delete ct; delete stableRandom; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } //get pscore of random tree randomData = pars.getValues(randT, processors, outputdir); if (m->getControl_pressed()) { delete output; delete randT; delete ct; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores map::iterator it = rscoreFreq[r].find(randomData[r]); if (it != rscoreFreq[r].end()) {//already have that score rscoreFreq[r][randomData[r]]++; }else{//first time we have seen this score rscoreFreq[r][randomData[r]] = 1; } //add randoms score to validscores validScores[randomData[r]] = randomData[r]; } delete randT; } delete stableRandom; } for(int a = 0; a < numComp; a++) { float rcumul = 0.0000; float ucumul = 0.0000; //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { if (randomtree == "") { map::iterator it2 = uscoreFreq[a].find(it->first); //user data has that score if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; } else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score //make uCumul map uCumul[a][it->first] = ucumul; } //make rscoreFreq map and rCumul map::iterator it2 = rscoreFreq[a].find(it->first); //get percentage of random trees with that info if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; } else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score rCumul[a][it->first] = rcumul; } //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file for (int h = 0; h < userTreeScores[a].size(); h++) { UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]); } } if (m->getControl_pressed()) { delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } printParsimonyFile(); if (randomtree == "") { printUSummaryFile(); } delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "execute"); exit(1); } } /***********************************************************/ void ParsimonyCommand::printParsimonyFile() { try { vector data; vector tags; if (randomtree == "") { tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul"); } else { tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul"); } for(int a = 0; a < numComp; a++) { output->setLabelName(groupComb[a], tags); //print each line for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { if (randomtree == "") { data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); }else{ data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); } output->updateOutput(data); data.clear(); } output->resetFile(); } } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "printParsimonyFile"); exit(1); } } /***********************************************************/ int ParsimonyCommand::printUSummaryFile() { try { //column headers outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl; m->mothurOut("Tree#\tGroups\tParsScore\tParsSig\n"); //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); //print each line for (int i = 0; i< T.size(); i++) { for(int a = 0; a < numComp; a++) { if (m->getControl_pressed()) { outSum.close(); return 0; } if (UScoreSig[a][i] > (1/(float)iters)) { outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl; cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl; m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine(); }else { outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl; cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl; m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine(); } } } outSum.close(); return 0; } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "printUSummaryFile"); exit(1); } } /***********************************************************/ void ParsimonyCommand::getUserInput() { try { //create treemap ct = new CountTable(); m->mothurOut("Please enter the number of groups you would like to analyze: "); cin >> numGroups; m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine(); int num, count; count = 1; numEachGroup.resize(numGroups, 0); set nameMap; map groupMap; set gps; for (int i = 1; i <= numGroups; i++) { m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": "); cin >> num; m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine(); gps.insert(toString(i)); //set tmaps namesOfSeqs for (int j = 0; j < num; j++) { groupMap[toString(count)] = toString(i); nameMap.insert(toString(count)); count++; } } ct->createTable(nameMap, groupMap, gps); //clears buffer so next command doesn't have error string s; getline(cin, s); Treenames = ct->getNamesOfSeqs(); } catch(exception& e) { m->errorOut(e, "ParsimonyCommand", "getUserInput"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/parsimonycommand.h000077500000000000000000000062701424121717000217630ustar00rootroot00000000000000#ifndef PARSIMONYCOMMAND_H #define PARSIMONYCOMMAND_H /* * parsimonycommand.h * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "treereader.h" #include "parsimony.h" #include "counttable.h" #include "fileoutput.h" #include "readtree.h" class ParsimonyCommand : public Command { public: ParsimonyCommand(string); ~ParsimonyCommand(){} vector setParameters(); string getCommandName() { return "parsimony"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Slatkin M, Maddison WP (1989). A cladistic measure of gene flow inferred from the phylogenies of alleles. Genetics 123: 603-13. \nSlatkin M, Maddison WP (1990). Detecting isolation by distance using phylogenies of genes. Genetics 126: 249-60. \nMartin AP (2002). Phylogenetic approaches for describing and comparing the diversity of microbial communities. Appl Environ Microbiol 68: 3673-82. \nSchloss PD, Handelsman J (2006). Introducing TreeClimber, a test to compare microbial community structure. Appl Environ Microbiol 72: 2379-84.\nhttp://www.mothur.org/wiki/Parsimony"; } string getDescription() { return "generic test that describes whether two or more communities have the same structure"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: FileOutput* output; vector T; //user trees Tree* randT; //random tree Tree* copyUserTree; CountTable* ct; CountTable* savect; vector groupComb; // AB. AC, BC... string sumFile, randomtree, allGroups, treefile, groupfile, namefile, countfile; int iters, numGroups, numComp, counter, processors, numUniquesInName; vector numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib. vector< vector > userTreeScores; //scores for users trees for each comb. vector< vector > UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower. EstOutput userData; //pscore info for user tree EstOutput randomData; //pscore info for random trees map validScores; //map contains scores from both user and random vector< map > rscoreFreq; //map -vector entry for each combination. vector< map > uscoreFreq; //map -vector entry for each combination. vector< map > rCumul; //map -vector entry for each combination. vector< map > uCumul; //map -vector entry for each combination. ofstream outSum; bool abort; string groups, itersString; vector Groups, outputNames; //holds groups to be used map nameMap; vector Treenames; void printParsimonyFile(); int printUSummaryFile(); void getUserInput(); int readNamesFile(); }; #endif mothur-1.48.0/source/commands/pcacommand.cpp000077500000000000000000000361171424121717000210430ustar00rootroot00000000000000/* * pcacommand.cpp * mothur * * Created by westcott on 1/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "pcacommand.h" #include "inputdata.h" //********************************************************************************************************************** vector PCACommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pmetric("metric", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmetric); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["pca"] = tempOutNames; outputTypes["loadings"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PCACommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PCACommand::getHelpString(){ try { string helpString = ""; helpString += "The pca command parameters are shared, relabund, label, groups and metric. shared or relabund is required unless you have a valid current file."; helpString += "The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multiple labels may be separated by dashes.\n"; helpString += "The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n"; helpString += "The metric parameter allows you to indicate if would like the pearson correlation coefficient calculated. Default=True"; helpString += "Example pca(groups=yourGroups).\n"; helpString += "Example pca(groups=A-B-C).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PCACommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PCACommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "pca") { pattern = "[filename],[distance],pca.axes"; } else if (type == "loadings") { pattern = "[filename],[distance],pca.loadings"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PCACommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** PCACommand::PCACommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser. getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { mode = "sharedfile"; inputFile = sharedfile; current->setSharedFile(sharedfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { relabundfile = ""; abort = true; } else if (relabundfile == "not found") { relabundfile = ""; } else { mode = "relabund"; inputFile = relabundfile; current->setRelAbundFile(relabundfile); } if ((sharedfile == "") && (relabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputFile = sharedfile; mode = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { relabundfile = current->getRelAbundFile(); if (relabundfile != "") { inputFile = relabundfile; mode = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a relabund or shared file.\n"); abort = true; } } } if (outputdir == ""){ outputdir += util.hasPath(inputFile); } string temp = validParameter.valid(parameters, "metric"); if (temp == "not found"){ temp = "T"; } metric = util.isTrue(temp); label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; if(labels.size() == 0) { m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); } } else { util.splitAtDash(label, labels); } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } } } catch(exception& e) { m->errorOut(e, "PCACommand", "PCACommand"); exit(1); } } //********************************************************************************************************************** int PCACommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); cerr.setf(ios::fixed, ios::floatfield); cerr.setf(ios::showpoint); //get first line of shared file vector< vector > matrix; InputData* input; if (mode == "sharedfile") { input = new InputData(inputFile, "sharedfile", Groups); }else if (mode == "relabund") { input = new InputData(inputFile, "relabund", Groups); }else { m->mothurOut("[ERROR]: filetype not recognized.\n"); return 0; } SharedRAbundFloatVectors* lookupFloat = input->getSharedRAbundFloatVectors(); string lastLabel = lookupFloat->getLabel(); Groups = lookupFloat->getNamesGroups(); set processedLabels; set userLabels = labels; //if the user gave no labels, then use the first one read if (labels.size() == 0) { label = lastLabel; process(lookupFloat); } //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete input; delete lookupFloat; return 0; } if(labels.count(lookupFloat->getLabel()) == 1){ processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); process(lookupFloat); } if ((util.anyLabelsToProcess(lookupFloat->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookupFloat->getLabel(); delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); process(lookupFloat); processedLabels.insert(lookupFloat->getLabel()); userLabels.erase(lookupFloat->getLabel()); //restore real lastlabel to save below lookupFloat->setLabels(saveLabel); } lastLabel = lookupFloat->getLabel(); //get next line to process //prevent memory leak delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete input; delete lookupFloat; return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookupFloat; lookupFloat = input->getSharedRAbundFloatVectors(lastLabel); process(lookupFloat); delete lookupFloat; } delete input; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PCACommand", "execute"); exit(1); } } /********************************************************************************************************************** vector< vector > PCACommand::createMatrix(vector lookupFloat){ try { vector< vector > matrix; matrix.resize(lookupFloat.size()); //fill matrix with shared files relative abundances for (int i = 0; i < lookupFloat.size(); i++) { for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) { matrix[i].push_back(lookupFloat[i]->getAbundance(j)); } } vector< vector > transposeMatrix; transposeMatrix.resize(matrix[0].size()); for (int i = 0; i < transposeMatrix.size(); i++) { for (int j = 0; j < matrix.size(); j++) { transposeMatrix[i].push_back(matrix[j][i]); } } matrix = linearCalc.matrix_mult(matrix, transposeMatrix); return matrix; } catch(exception& e) { m->errorOut(e, "PCACommand", "createMatrix"); exit(1); } }*/ //********************************************************************************************************************** int PCACommand::process(SharedRAbundFloatVectors*& lookupFloat){ try { m->mothurOut("\nProcessing " + lookupFloat->getLabel()); m->mothurOutEndLine(); int numOTUs = lookupFloat->getNumBins(); int numSamples = lookupFloat->getNumGroups(); vector< vector > matrix(numSamples); vector colMeans(numOTUs); //fill matrix with shared relative abundances, re-center vector data = lookupFloat->getSharedRAbundFloatVectors(); for (int i = 0; i < numSamples; i++) { matrix[i].resize(numOTUs, 0); for (int j = 0; j < numOTUs; j++) { matrix[i][j] = data[i]->get(j); colMeans[j] += matrix[i][j]; } delete data[i]; } data.clear(); for(int j=0;j > centered = matrix; for(int i=0;i > transpose(numOTUs); for (int i = 0; i < numOTUs; i++) { transpose[i].resize(numSamples, 0); for (int j = 0; j < numSamples; j++) { transpose[i][j] = centered[j][i]; } } vector > crossProduct = linearCalc.matrix_mult(transpose, centered); vector d; vector e; linearCalc.tred2(crossProduct, d, e); if (m->getControl_pressed()) { return 0; } linearCalc.qtli(d, e, crossProduct); if (m->getControl_pressed()) { return 0; } vector > X = linearCalc.matrix_mult(centered, crossProduct); if (m->getControl_pressed()) { return 0; } string fbase = outputdir + util.getRootName(util.getSimpleName(inputFile)); //string outputFileName = fbase + lookupFloat[0]->getLabel(); output(fbase, lookupFloat->getLabel(), Groups, X, d); if (metric) { vector > observedEuclideanDistance = linearCalc.getObservedEuclideanDistance(centered); for (int i = 1; i < 4; i++) { vector< vector > PCAEuclidDists = linearCalc.calculateEuclidianDistance(X, i); //G is the pca file if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } double corr = linearCalc.calcPearson(PCAEuclidDists, observedEuclideanDistance); m->mothurOut("Rsq " + toString(i) + " axis: " + toString(corr * corr)); m->mothurOutEndLine(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } } } return 0; } catch(exception& e) { m->errorOut(e, "PCACommand", "process"); exit(1); } } /*********************************************************************************************************************************/ void PCACommand::output(string fbase, string label, vector name_list, vector >& G, vector d) { try { int numEigenValues = d.size(); double dsum = 0.0000; for(int i=0;i variables; variables["[filename]"] = fbase; variables["[distance]"] = label; string pcaFileName = getOutputFileName("pca",variables); util.openOutputFile(pcaFileName, pcaData); pcaData.setf(ios::fixed, ios::floatfield); pcaData.setf(ios::showpoint); outputNames.push_back(pcaFileName); outputTypes["pca"].push_back(pcaFileName); ofstream pcaLoadings; string loadingsFilename = getOutputFileName("loadings",variables); util.openOutputFile(loadingsFilename, pcaLoadings); pcaLoadings.setf(ios::fixed, ios::floatfield); pcaLoadings.setf(ios::showpoint); outputNames.push_back(loadingsFilename); outputTypes["loadings"].push_back(loadingsFilename); pcaLoadings << "axis\tloading\n"; for(int i=0;ierrorOut(e, "PCACommand", "output"); exit(1); } } /*********************************************************************************************************************************/ mothur-1.48.0/source/commands/pcacommand.h000077500000000000000000000027151424121717000205050ustar00rootroot00000000000000#ifndef PCACOMMAND_H #define PCACOMMAND_H /* * pcacommand.h * mothur * * Created by westcott on 1/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "linearalgebra.h" #include "sharedrabundfloatvectors.hpp" /*****************************************************************/ class PCACommand : public Command { public: PCACommand(string); ~PCACommand() = default; vector setParameters(); string getCommandName() { return "pca"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Pca"; } string getDescription() { return "pca"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, metric; string mode, inputFile, label, groups, sharedfile, relabundfile; vector outputNames, Groups; set labels; LinearAlgebra linearCalc; //vector< vector > createMatrix(vector); int process(SharedRAbundFloatVectors*&); void output(string, string, vector, vector >&, vector); }; /*****************************************************************/ #endif mothur-1.48.0/source/commands/pcoacommand.cpp000077500000000000000000000206161424121717000212170ustar00rootroot00000000000000 /* * pcacommand.cpp * Mothur * * Created by westcott on 1/4/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "pcoacommand.h" #include "readphylipvector.h" //********************************************************************************************************************** vector PCOACommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","pcoa-loadings",false,true,true); parameters.push_back(pphylip); CommandParameter pmetric("metric", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmetric); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["pcoa"] = tempOutNames; outputTypes["loadings"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PCOACommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PCOACommand::getHelpString(){ try { string helpString = ""; helpString += "The pcoa command parameters are phylip and metric"; helpString += "The phylip parameter allows you to enter your distance file."; helpString += "The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"; helpString += "Example pcoa(phylip=yourDistanceFile).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PCOACommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PCOACommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "pcoa") { pattern = "[filename],pcoa.axes"; } else if (type == "loadings") { pattern = "[filename],pcoa.loadings"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PCOACommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** PCOACommand::PCOACommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser. getParameters(); ValidParameters validParameter; phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { abort = true; } else if (phylipfile == "not found") { //if there is a current phylip file, use it phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("You have no current phylip file and the phylip parameter is required.\n"); abort = true; } }else { current->setPhylipFile(phylipfile); } filename = phylipfile; if (outputdir == ""){ outputdir += util.hasPath(phylipfile); } string temp = validParameter.valid(parameters, "metric"); if (temp == "not found"){ temp = "T"; } metric = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "PCOACommand", "PCOACommand"); exit(1); } } //********************************************************************************************************************** int PCOACommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); cerr.setf(ios::fixed, ios::floatfield); cerr.setf(ios::showpoint); vector names; vector > D; fbase = outputdir + util.getRootName(util.getSimpleName(filename)); ReadPhylipVector readFile(filename); names = readFile.read(D); if (m->getControl_pressed()) { return 0; } double offset = 0.0000; vector d; vector e; vector > G = D; //vector > copy_G; m->mothurOut("\nProcessing...\n"); for(int count=0;count<2;count++){ linearCalc.recenter(offset, D, G); if (m->getControl_pressed()) { return 0; } linearCalc.tred2(G, d, e); if (m->getControl_pressed()) { return 0; } linearCalc.qtli(d, e, G); if (m->getControl_pressed()) { return 0; } offset = d[d.size()-1]; if(offset > 0.0) break; } if (m->getControl_pressed()) { return 0; } output(fbase, names, G, d); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (metric) { for (int i = 1; i < 4; i++) { vector< vector > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } double corr = linearCalc.calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix m->mothurOut("Rsq " + toString(i) + " axis: " + toString(corr * corr)); m->mothurOutEndLine(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PCOACommand", "execute"); exit(1); } } /*********************************************************************************************************************************/ void PCOACommand::get_comment(istream& f, char begin, char end){ try { char d=f.get(); while(d != end){ d = f.get(); } d = f.peek(); } catch(exception& e) { m->errorOut(e, "PCOACommand", "get_comment"); exit(1); } } /*********************************************************************************************************************************/ void PCOACommand::output(string fnameRoot, vector name_list, vector >& G, vector d) { try { int rank = name_list.size(); double dsum = 0.0000; for(int i=0;i= 0) { G[i][j] *= pow(d[j],0.5); } else { G[i][j] = 0.00000; } } } ofstream pcaData; map variables; variables["[filename]"] = fnameRoot; string pcoaDataFile = getOutputFileName("pcoa",variables); util.openOutputFile(pcoaDataFile, pcaData); pcaData.setf(ios::fixed, ios::floatfield); pcaData.setf(ios::showpoint); outputNames.push_back(pcoaDataFile); outputTypes["pcoa"].push_back(pcoaDataFile); ofstream pcaLoadings; string loadingsFile = getOutputFileName("loadings",variables); util.openOutputFile(loadingsFile, pcaLoadings); pcaLoadings.setf(ios::fixed, ios::floatfield); pcaLoadings.setf(ios::showpoint); outputNames.push_back(loadingsFile); outputTypes["loadings"].push_back(loadingsFile); pcaLoadings << "axis\tloading\n"; for(int i=0;ierrorOut(e, "PCOACommand", "output"); exit(1); } } /*********************************************************************************************************************************/ mothur-1.48.0/source/commands/pcoacommand.h000077500000000000000000000024241424121717000206610ustar00rootroot00000000000000#ifndef PCOACOMMAND_H #define PCOACOMMAND_H /* * pcoacommand.h * Mothur * * Created by westcott on 1/4/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "linearalgebra.h" /*****************************************************************/ class PCOACommand : public Command { public: PCOACommand(string); ~PCOACommand(){} vector setParameters(); string getCommandName() { return "pcoa"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Pcoa"; } string getDescription() { return "pcoa"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, metric; string phylipfile, filename, fbase; vector outputNames; LinearAlgebra linearCalc; void get_comment(istream&, char, char); void output(string, vector, vector >&, vector); }; /*****************************************************************/ #endif mothur-1.48.0/source/commands/pcrseqscommand.cpp000066400000000000000000001474011424121717000217540ustar00rootroot00000000000000// // prcseqscommand.cpp // Mothur // // Created by Sarah Westcott on 3/14/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "pcrseqscommand.h" //********************************************************************************************************************** vector PcrSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax); CommandParameter preorient("checkorient", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli); CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart); CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend); CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter prdiffs("rdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(prdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer); CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PcrSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The pcr.seqs command reads a fasta file.\n"; helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, rdiffs, processors, keepprimer and keepdots.\n"; helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n"; helpString += "The start parameter allows you to provide a starting position to trim to.\n"; helpString += "The end parameter allows you to provide a ending position to trim from.\n"; helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file. if nomatch=true, then do nothing to sequence.\n"; helpString += "The checkorient parameter will look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is true.\n"; helpString += "The processors parameter allows you to use multiple processors.\n"; helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n"; helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the forward primer. The default is 0.\n"; helpString += "The rdiffs parameter is used to specify the number of differences allowed in the reverse primer. The default is 0.\n"; ; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PcrSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pcr,[extension]"; } else if (type == "name") { pattern = "[filename],pcr,[extension]"; } else if (type == "group") { pattern = "[filename],pcr,[extension]"; } else if (type == "count") { pattern = "[filename],pcr,[extension]"; } else if (type == "accnos") { pattern = "[filename],bad.accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** PcrSeqsCommand::PcrSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else if (fastafile == "not open") { fastafile = ""; abort = true; } else { current->setFastaFile(fastafile); } if (outputdir == ""){ outputdir = util.hasPath(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "keepprimer"); if (temp == "not found") { temp = "f"; } keepprimer = util.isTrue(temp); temp = validParameter.valid(parameters, "keepdots"); if (temp == "not found") { temp = "t"; } keepdots = util.isTrue(temp); temp = validParameter.validFile(parameters, "oligos"); if (temp == "not found"){ oligosfile = ""; } else if(temp == "not open"){ oligosfile = ""; abort = true; } else { oligosfile = temp; current->setOligosFile(oligosfile); } ecolifile = validParameter.validFile(parameters, "ecoli"); if (ecolifile == "not found"){ ecolifile = ""; } else if(ecolifile == "not open"){ ecolifile = ""; abort = true; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not found"){ namefile = ""; } else if(namefile == "not open"){ namefile = ""; abort = true; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not found"){ groupfile = ""; } else if(groupfile == "not open"){ groupfile = ""; abort = true; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort = true; } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not found"){ taxfile = ""; } else if(taxfile == "not open"){ taxfile = ""; abort = true; } else { current->setTaxonomyFile(taxfile); } temp = validParameter.valid(parameters, "start"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, start); temp = validParameter.valid(parameters, "end"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, end); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "rdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, rdiffs); temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "T"; } reorient = util.isTrue(temp); nomatch = validParameter.valid(parameters, "nomatch"); if (nomatch == "not found") { nomatch = "reject"; } if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n"); abort = true; } //didnt set anything if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) { m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true; } if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; } if ((ecolifile != "") && (start != -1) && (end != -1)) { m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend. When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true; } if ((oligosfile != "") && (ecolifile != "")) { m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand"); exit(1); } } //*************************************************************************************************************** int PcrSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); fileAligned = true; pairedOligos = false; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string trimSeqFile = getOutputFileName("fasta",variables); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); variables["[tag]"] = "scrap"; string badSeqFile = getOutputFileName("fasta",variables); length = 0; if (m->getControl_pressed()) { return 0; } set badNames; long long numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); if (m->getControl_pressed()) { return 0; } thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("accnos",variables); //don't write or keep if blank bool wroteAccnos = false; if (badNames.size() != 0) { writeAccnos(badNames, outputFileName); wroteAccnos = true; outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); } else { m->mothurOut("[NOTE]: no sequences were bad, removing " + outputFileName + "\n\n"); } if (util.isBlank(badSeqFile)) { util.mothurRemove(badSeqFile); } else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); } if (wroteAccnos) { string inputStringTemp = ""; if (countfile != "") { inputStringTemp += ", count=" + countfile; } else{ if (namefile != "") { inputStringTemp += ", name=" + namefile; } if (groupfile != "") { inputStringTemp += ", group=" + groupfile; } } if (taxfile != "") { inputStringTemp += ", taxonomy=" + taxfile; } string inputString = "accnos=" + outputFileName + inputStringTemp; if (inputStringTemp != "") { m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); if (groupfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outGroup = getOutputFileName("group", variables); util.renameFile(filenames["group"][0], outGroup); outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup); } if (namefile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outName = getOutputFileName("name", variables); util.renameFile(filenames["name"][0], outName); outputNames.push_back(outName); outputTypes["name"].push_back(outName); } if (countfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outCount = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], outCount); outputNames.push_back(outCount); outputTypes["count"].push_back(outCount); } if (taxfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); util.renameFile(filenames["taxonomy"][0], outputFileName); outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); } m->mothurOut("/******************************************/\n"); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "execute"); exit(1); } } //*************************************************************************************************************** bool readOligos(Oligos& oligos, string oligosfile, bool& pairedOligos, int& numFPrimers, int& numRPrimers, MothurOut* m){ try { oligos.read(oligosfile); if (m->getControl_pressed()) { return false; } //error in reading oligos if (oligos.hasPairedPrimers()) { pairedOligos = true; numFPrimers = oligos.getPairedPrimers().size(); numRPrimers = numFPrimers; }else { pairedOligos = false; numFPrimers = oligos.getPrimers().size(); numRPrimers = oligos.getReversePrimers().size(); } if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); } if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); } return true; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "readOligos"); exit(1); } } //********************************************************************/ bool isAligned(string seq, map& aligned){ aligned.clear(); bool isAligned = false; int countBases = 0; for (int i = 0; i < seq.length(); i++) { if (!isalpha(seq[i])) { isAligned = true; } else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned. } //ie. the 3rd base may be at spot 10 in the alignment //later when we trim we want to trim from spot 10. return isAligned; } /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct pcrData { string filename; string oligosfile, nomatch; OutputWriter* goodFasta; OutputWriter* badFasta; unsigned long long fstart; unsigned long long fend; int count, start, end, length, pdiffs, pstart, pend, rdiffs; int numFPrimers, numRPrimers; MothurOut* m; set badSeqNames; bool keepprimer, keepdots, fileAligned, adjustNeeded, reorient; Utils util; map > locations; Sequence ecoli; pcrData(){} pcrData(string f, string ol, OutputWriter* gf, OutputWriter* bfn, Sequence ec, string nm, bool kp, bool kd, int pd, int rd, bool re, unsigned long long fst, unsigned long long fen, int st, int en) { filename = f; goodFasta = gf; badFasta = bfn; m = MothurOut::getInstance(); oligosfile = ol; start = st; end = en; ecoli = ec; reorient = re; if (ecoli.getName() != "filler") { length = ecoli.getAligned().length(); start = ecoli.getStartPos(); end = ecoli.getEndPos(); } nomatch = nm; keepprimer = kp; keepdots = kd; fstart = fst; fend = fen; pdiffs = pd; rdiffs = rd; count = 0; fileAligned = true; adjustNeeded = false; pstart = -1; pend = -1; } }; /**************************************************************************************************/ vector fillTrims(pcrData* params, bool& pairedOligos) { try { vector trims; params->numFPrimers = 0; params->numRPrimers = 0; if (params->oligosfile != "") { Oligos oligos; //params->numFPrimers = 0; params->numRPrimers = 0; map barcodes; //not used readOligos(oligos, params->oligosfile, pairedOligos, params->numFPrimers, params->numRPrimers, params->m); if (pairedOligos) { map primers; vector revPrimer; map primerPairs = oligos.getPairedPrimers(); for (map::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) { primers[(it->second).forward] = it->first; revPrimer.push_back((it->second).reverse); } //standard trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); }else{ map primers; vector revPrimer; primers = oligos.getPrimers(); revPrimer = oligos.getReversePrimers(); //standard trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); } if (params->reorient) { //reoriented if (pairedOligos) { map primers; vector revPrimer; map rprimerPairs = oligos.getReorientedPairedPrimers(); for (map::iterator it = rprimerPairs.begin(); it != rprimerPairs.end(); it++) { primers[(it->second).forward] = it->first; revPrimer.push_back((it->second).reverse); } //reoriented trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); primers.clear(); revPrimer.clear(); map revprimerPairs = oligos.getReversedPairedPrimers(); for (map::iterator it = revprimerPairs.begin(); it != revprimerPairs.end(); it++) { primers[(it->second).forward] = it->first; revPrimer.push_back((it->second).reverse); } //reversed trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); }else{ map primers; vector revPrimer; primers = oligos.getReorientedPrimers(); revPrimer = oligos.getReorientedReversePrimers(); //reoriented trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); primers.clear(); revPrimer.clear(); primers = oligos.getReversedPrimers(); revPrimer = oligos.getReversedReversePrimers(); //reversed trims.push_back(new TrimOligos(params->pdiffs, params->rdiffs, 0, primers, barcodes, revPrimer)); } } } return trims; }catch(exception& e) { params->m->errorOut(e, "PcrSeqsCommand", "fillTrims"); exit(1); } } /**************************************************************************************************/ bool trimStartEnd(Sequence& seq, pcrData* params) { try { bool good = true; //make sure the seqs are aligned if (!params->fileAligned) { params->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); params->m->setControl_pressed(true); good = false; } else { string alignedString = seq.getAligned(); if ((seq.getStartPos() > params->start) || (seq.getEndPos() < params->end)) { good = false; if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + seq.getName()+ " values at locations (" + toString(params->start) + "," + toString(params->end) + ") = (" + alignedString[params->start] + "," + alignedString[params->end] + ")\n"); } } else { if (params->end != -1) { if (params->end > seq.getAligned().length()) { params->m->mothurOut("[ERROR]: end of " +toString(params->end) + " is longer than " + seq.getName() + " length of " +toString(seq.getAligned().length()) + ", aborting.\n"); params->m->setControl_pressed(true); good = false; } else { if (params->keepdots) { seq.filterFromPos(params->end); } else { string seqString = seq.getAligned().substr(0, (params->end)); seq.setAligned(seqString); } } } if (params->start != -1) { if (params->keepdots) { seq.filterToPos(params->start-1); } else { string seqString = seq.getAligned().substr(params->start-1); seq.setAligned(seqString); } } } } return good; }catch(exception& e) { params->m->errorOut(e, "PcrSeqsCommand", "trimStartEnd"); exit(1); } } /**************************************************************************************************/ vector trimPrimers(Sequence& seq, vector trims, vector& thisSeqsLocations, int& thisPStart, int& thisPEnd, pcrData* params) { try { vector codes; codes.resize(2, ""); for (int i = 0; i < trims.size(); i++) { Sequence savedSeq(seq.getName(), seq.getAligned()); map mapAligned; bool aligned = isAligned(savedSeq.getAligned(), mapAligned); string trashCode = ""; string commentString = ""; int currentSeqsDiffs = 0; int reverseIndex = 0; int primerIndex = 0; bool goodSeq = true; if(params->numFPrimers != 0){ int primerStart = 0; int primerEnd = 0; vector results = trims[i]->findForward(savedSeq, primerStart, primerEnd); bool good = true; if (results[0] > params->pdiffs) { good = false; } currentSeqsDiffs += results[0]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trims[i]->getCodeValue(results[1], params->pdiffs) + ") "; if(!good){ if (params->nomatch == "reject") { goodSeq = false; } trashCode += "f"; } else{ //are you aligned if (aligned) { if (!params->keepprimer) { if (params->keepdots) { savedSeq.filterToPos(mapAligned[primerEnd-1]+1); } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that. The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned. else{ savedSeq.setAligned(savedSeq.getAligned().substr(mapAligned[primerEnd-1]+1)); if (params->fileAligned) { thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1); thisSeqsLocations.push_back(thisPStart); } } }else { if (params->keepdots) { savedSeq.filterToPos(mapAligned[primerStart]); } else { savedSeq.setAligned(savedSeq.getAligned().substr(mapAligned[primerStart])); if (params->fileAligned) { thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]); thisSeqsLocations.push_back(thisPStart); } } } isAligned(savedSeq.getAligned(), mapAligned); }else { if (!params->keepprimer) { savedSeq.setAligned(savedSeq.getUnaligned().substr(primerEnd)); } else { savedSeq.setAligned(savedSeq.getUnaligned().substr(primerStart)); } } } } if(params->numRPrimers != 0){ int primerStart = 0; int primerEnd = 0; vector results = trims[i]->findReverse(savedSeq, primerStart, primerEnd); bool good = true; if (results[0] > params->rdiffs) { good = false; } currentSeqsDiffs += results[0]; commentString += "rpdiffs=" + toString(results[0]) + "(" + trims[i]->getCodeValue(results[1], params->rdiffs) + ") "; if(!good){ if (params->nomatch == "reject") { goodSeq = false; } trashCode += "r"; } else{ //are you aligned if (aligned) { if (!params->keepprimer) { if (params->keepdots) { savedSeq.filterFromPos(mapAligned[primerStart]); } else { savedSeq.setAligned(savedSeq.getAligned().substr(0, mapAligned[primerStart])); if (params->fileAligned) { thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]); thisSeqsLocations.push_back(thisPEnd); } } } else { if (params->keepdots) { savedSeq.filterFromPos(mapAligned[primerEnd-1]+1); } else { savedSeq.setAligned(savedSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1)); if (params->fileAligned) { thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1); thisSeqsLocations.push_back(thisPEnd); } } } } else { if (!params->keepprimer) { savedSeq.setAligned(savedSeq.getUnaligned().substr(0, primerStart)); } else { savedSeq.setAligned(savedSeq.getUnaligned().substr(0, primerEnd)); } } } } if (currentSeqsDiffs > (params->pdiffs + params->rdiffs)) { trashCode += 't'; } if (trashCode == "") { codes[0] = ""; codes[1] = commentString; if (i > 0) { //checkOrient trimOligos - reoriented and reversed savedSeq.reverseComplement(); } seq.setAligned(savedSeq.getAligned()); break; }else { if (codes[0] == "") { codes[0] = trashCode; } else { codes[0] += "(" + trashCode + ")"; } codes[1] = commentString; } } return codes; }catch(exception& e) { params->m->errorOut(e, "PcrSeqsCommand", "trimPrimers"); exit(1); } } //********************************************************************************************************************** int driverPcr(pcrData* params){ try { ifstream inFASTA; params->util.openInputFile(params->filename, inFASTA); inFASTA.seekg(params->fstart); bool done = false; params->count = 0; set lengths; set startLocations; set endLocations; bool pairedOligos = false; vector trims = fillTrims(params, pairedOligos); //standard, if reorient parameter then (reorient & reverse) as well while (!done) { if (params->m->getControl_pressed()) { break; } Sequence currSeq(inFASTA); gobble(inFASTA); if (params->fileAligned) { //assume aligned until proven otherwise lengths.insert(currSeq.getAligned().length()); if (lengths.size() > 1) { params->fileAligned = false; } } if (params->m->getControl_pressed()) { break; } string trashCode = ""; string commentString = ""; int thisPStart = -1; int thisPEnd = -1; if (currSeq.getName() != "") { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } bool goodSeq = true; vector thisSeqsLocations; if (params->oligosfile != "") { //removing primers vector results = trimPrimers(currSeq, trims, thisSeqsLocations, thisPStart, thisPEnd, params); trashCode = results[0]; commentString = results[1]; if (commentString != "") { string seqComment = currSeq.getComment(); currSeq.setComment("\t" + commentString + "\t" + seqComment); } if (trashCode != "") { goodSeq = false; } }else if (params->ecoli.getName() != "filler") { //make sure the seqs are aligned if (!params->fileAligned) { params->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); params->m->setControl_pressed(true); break; } else if (currSeq.getAligned().length() != params->length) { params->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); params->m->setControl_pressed(true); break; }else { if (params->keepdots) { currSeq.filterFromPos(params->end); currSeq.filterToPos(params->start-1); }else { string seqString = currSeq.getAligned().substr(0, params->end); seqString = seqString.substr(params->start); currSeq.setAligned(seqString); } } }else{ //using start and end to trim goodSeq = trimStartEnd(currSeq, params); //error message if seqs unaligned } //remove super short reads if (currSeq.getUnaligned() == "") { goodSeq = false; currSeq.setAligned("NNNNNNN"); } if(goodSeq) { currSeq.printSequence(params->goodFasta); if (thisPStart != -1) { startLocations.insert(thisPStart); } if (thisPEnd != -1) { endLocations.insert(thisPEnd); } if (thisSeqsLocations.size() != 0) { params->locations[currSeq.getName()] = thisSeqsLocations; } } else { params->badSeqNames.insert(currSeq.getName()); currSeq.setName(currSeq.getName() + '|' + trashCode); currSeq.printSequence(params->badFasta); } params->count++; } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->fend)) { break; } #else if ((params->count == params->fend) || (inFASTA.eof())) { break; } #endif //report progress if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } } //report progress if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } inFASTA.close(); for (int i = 0; i < trims.size(); i++) { delete trims[i]; } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: fileAligned = " + toString(params->fileAligned) +'\n'); } if (params->fileAligned && !params->keepdots) { //print out smallest start value and largest end value if (startLocations.size() > 1) { params->adjustNeeded = true; } if (endLocations.size() > 1) { params->adjustNeeded = true; } if (params->numFPrimers != 0) { set::iterator it = startLocations.begin(); params->pstart = *it; if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + params->util.getStringFromSet(startLocations, " ")+"\n"); } } if (params->numRPrimers != 0) { set::reverse_iterator it = endLocations.rbegin(); params->pend = *it; if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + params->util.getStringFromSet(endLocations, " ")+"\n"); } } } return params->count; } catch(exception& e) { params->m->errorOut(e, "PcrSeqsCommand", "driverPcr"); exit(1); } } /**************************************************************************************************/ long long PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set& badSeqNames) { try { Sequence ecoliSeq("filler","NNNN"); if(ecolifile != "") { ecoliSeq = readEcoli(); } if (m->getControl_pressed()) { return 0; } vector positions; vector lines; long long numFastaSeqs = 0; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosFasta(fastafile, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; auto synchronizedGoodFastaFile = make_shared(goodFileName); auto synchronizedBadFastaFile = make_shared(badFileName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadFastaWriter = new OutputWriter(synchronizedGoodFastaFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedBadFastaFile); pcrData* dataBundle = new pcrData(filename, oligosfile, threadFastaWriter, threadFastaScrapWriter, ecoliSeq, nomatch, keepprimer, keepdots, pdiffs, rdiffs, reorient, lines[i+1].start, lines[i+1].end, start, end); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverPcr, dataBundle)); } OutputWriter* threadFastaWriter = new OutputWriter(synchronizedGoodFastaFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedBadFastaFile); pcrData* dataBundle = new pcrData(filename, oligosfile, threadFastaWriter, threadFastaScrapWriter, ecoliSeq, nomatch, keepprimer, keepdots, pdiffs, rdiffs, reorient, lines[0].start, lines[0].end, start, end); driverPcr(dataBundle); numFastaSeqs = dataBundle->count; badSeqNames = dataBundle->badSeqNames; map > locations = dataBundle->locations; bool adjustNeeded = dataBundle->adjustNeeded; int pstart = -1; int pend = -1; pstart = dataBundle->pstart; pend = dataBundle->pend; bool hasFPrimers = false; if (dataBundle->numFPrimers != 0) { hasFPrimers = true; } bool hasRPrimers = false; if (dataBundle->numRPrimers != 0) { hasRPrimers = true; } if (m->getDebug()) { m->mothurOut("[DEBUG]: pstart = " + toString(pstart) + ", pend = " + toString(pend) + "\n"); } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); numFastaSeqs += data[i]->count; delete data[i]->goodFasta; delete data[i]->badFasta; if (data[i]->adjustNeeded) { adjustNeeded = true; } if (data[i]->pstart != -1) { if (data[i]->pstart != pstart) { adjustNeeded = true; } if (data[i]->pstart < pstart) { pstart = data[i]->pstart; } } //smallest start if (data[i]->pend != -1) { if (data[i]->pend != pend) { adjustNeeded = true; } if (data[i]->pend > pend) { pend = data[i]->pend; } }//largest end if (m->getDebug()) { m->mothurOut("[DEBUG]: process " + toString(i) + " pstart = " + toString(data[i]->pstart) + ", pend = " + toString(data[i]->pend) + "\n"); } badSeqNames.insert(data[i]->badSeqNames.begin(), data[i]->badSeqNames.end()); locations.insert(data[i]->locations.begin(), data[i]->locations.end()); delete data[i]; delete workerThreads[i]; } synchronizedGoodFastaFile->close(); //must explicitly close or file may still be open when reading and writing in adjustDots synchronizedBadFastaFile->close(); //must explicitly close or file may still be open when reading and writing in adjustDots delete threadFastaWriter; delete threadFastaScrapWriter; delete dataBundle; if (m->getDebug()) { m->mothurOut("[DEBUG]: pstart = " + toString(pstart) + ", pend = " + toString(pend) + "\n"); } if (fileAligned && adjustNeeded) { //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be. for (map >::iterator it = locations.begin(); it != locations.end(); it++) { if (m->getControl_pressed()) { break; } string name = it->first; int thisStart = -1; int thisEnd = -1; if (hasFPrimers) { thisStart = it->second[0]; } if (hasRPrimers) { thisEnd = it->second[1]; } else { pend = -1; break; } int myDiff = 0; if (thisStart != -1) { //my start if (thisStart != pstart) { //my start is after where the first start occurs, so I need to pad in the front myDiff += (thisStart - pstart); //size of my pad } } int myEnd = thisEnd + myDiff; if (thisEnd != -1) { if (myEnd > pend) { pend = myEnd; } } } if (m->getDebug()) { m->mothurOut("[DEBUG]: pstart = " + toString(pstart) + ", pend = " + toString(pend) + "\n"); } adjustDots(goodFileName, locations, pstart, pend, hasFPrimers, hasRPrimers); } return numFastaSeqs; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** int PcrSeqsCommand::adjustDots(string goodFasta, map > locations, int pstart, int pend, bool hasFPrimers, bool hasRPrimers){ try { ifstream inFasta; util.openInputFile(goodFasta, inFasta); ofstream out; util.openOutputFile(goodFasta+".temp", out); set lengths; while(!inFasta.eof()) { if(m->getControl_pressed()) { break; } Sequence seq(inFasta); gobble(inFasta); string name = seq.getName(); int thisStart = -1; int thisEnd = -1; map >::iterator it = locations.find(name); if (it != locations.end()) { if (hasFPrimers) { thisStart = it->second[0]; } if (hasRPrimers) { thisEnd = it->second[1]; } }else { m->mothurOut("[ERROR]: should never get here in pcr.seqs.\n"); } if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); } else { string forwardPad = ""; string reversePad = ""; if ((pstart != -1) && (thisStart != -1) && (thisStart != pstart)) { for (int i = pstart; i < thisStart; i++) { forwardPad += "."; } thisEnd += forwardPad.length(); } if ((pend != -1) && (thisEnd != -1) && (thisEnd != pend)) { for (int i = thisEnd; i < pend; i++) { reversePad += "."; } } string aligned = forwardPad + seq.getAligned() + reversePad; seq.setAligned(aligned); lengths.insert(seq.getAligned().length()); } seq.printSequence(out); } inFasta.close(); out.close(); util.mothurRemove(goodFasta); util.renameFile(goodFasta+".temp", goodFasta); return 0; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "adjustDots"); exit(1); } } //*************************************************************************************************************** Sequence PcrSeqsCommand::readEcoli(){ try { ifstream in; util.openInputFile(ecolifile, in); //read seq Sequence result; if (!in.eof()){ Sequence ecoli(in); result.setName(ecoli.getName()); result.setAligned(ecoli.getAligned()); }else { m->setControl_pressed(true); } in.close(); return result; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "readEcoli"); exit(1); } } //*************************************************************************************************************** int PcrSeqsCommand::writeAccnos(set badNames, string outputFileName){ try { ofstream out; util.openOutputFile(outputFileName, out); for (set::iterator it = badNames.begin(); it != badNames.end(); it++) { if (m->getControl_pressed()) { break; } out << (*it) << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "PcrSeqsCommand", "writeAccnos"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/pcrseqscommand.h000077500000000000000000000030021424121717000214100ustar00rootroot00000000000000#ifndef Mothur_pcrseqscommand_h #define Mothur_pcrseqscommand_h // // pcrseqscommand.h // Mothur // // Created by Sarah Westcott on 3/14/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "sequence.hpp" #include "trimoligos.h" #include "alignment.hpp" #include "needlemanoverlap.hpp" #include "counttable.h" #include "oligos.h" #include "removeseqscommand.h" class PcrSeqsCommand : public Command { public: PcrSeqsCommand(string); ~PcrSeqsCommand(){} vector setParameters(); string getCommandName() { return "pcr.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; } string getDescription() { return "pcr.seqs"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, keepprimer, keepdots, fileAligned, pairedOligos, reorient; string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, nomatch; int start, end, processors, length, pdiffs, rdiffs; vector outputNames; int writeAccnos(set, string); Sequence readEcoli(); long long createProcesses(string, string, string, set&); int adjustDots(string goodFasta, map > locations, int pstart, int pend, bool, bool); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/phylodiversitycommand.cpp000077500000000000000000000764671424121717000234120ustar00rootroot00000000000000/* * phylodiversitycommand.cpp * Mothur * * Created by westcott on 4/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "phylodiversitycommand.h" #include "treereader.h" //********************************************************************************************************************** vector PhyloDiversityCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy); CommandParameter psubsample("sampledepth", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary); CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect); CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["phylodiv"] = tempOutNames; outputTypes["rarefy"] = tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PhyloDiversityCommand::getHelpString(){ try { string helpString = ""; helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n"; helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "The sampledepth parameter allows you to enter the number of sequences you want to sample.\n"; helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n"; helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n"; helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n"; helpString += "The summary parameter allows you to create a .summary file. The default is true.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n"; helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n"; helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PhyloDiversityCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylodiv") { pattern = "[filename],[tag],phylodiv"; } else if (type == "rarefy") { pattern = "[filename],[tag],phylodiv.rarefaction"; } else if (type == "summary") { pattern = "[filename],[tag],phylodiv.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** PhyloDiversityCommand::PhyloDiversityCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("You have no current tree file and the tree parameter is required.\n"); abort = true; } }else { current->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if (outputdir == ""){ outputdir = util.hasPath(treefile); } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "rarefy"); if (temp == "not found") { temp = "F"; } rarefy = util.isTrue(temp); temp = validParameter.valid(parameters, "sampledepth"); if (temp == "not found") { temp = "0"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); if (subsampleSize == 0) { subsample = false; } else { subsample = true; } }else { subsample = false; m->mothurOut("[ERROR]: sampledepth must be numeric, aborting.\n\n"); abort=true; } if (subsample) { rarefy = true; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); if (!rarefy) { iters = 1; processors = 1; } temp = validParameter.valid(parameters, "summary"); if (temp == "not found") { temp = "T"; } summary = util.isTrue(temp); temp = validParameter.valid(parameters, "scale"); if (temp == "not found") { temp = "F"; } scale = util.isTrue(temp); temp = validParameter.valid(parameters, "collect"); if (temp == "not found") { temp = "F"; } collect = util.isTrue(temp); groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default.\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand"); exit(1); } } //********************************************************************************************************************** void printSumData(map< string, vector >& div, ofstream& out, int numIters, vector Groups, int subsampleSize, bool subsample, bool scale){ out << "Groups\tnumSampled\tphyloDiversity" << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); int numSampled = 0; for (int j = 0; j < Groups.size(); j++) { if (subsample) { numSampled = subsampleSize; } else { numSampled = (div[Groups[j]].size()-1); } out << Groups[j] << '\t' << numSampled << '\t'; float score; if (scale) { score = (div[Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } else { score = div[Groups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << endl; } out.close(); } //********************************************************************************************************************** void printData(set& num, map< string, vector >& div, ofstream& out, int numIters, vector Groups, bool scale){ out << "numSampled"; for (int i = 0; i < Groups.size(); i++) { out << '\t' << Groups[i]; } out << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); for (set::iterator it = num.begin(); it != num.end(); it++) { int numSampled = *it; out << numSampled; for (int j = 0; j < Groups.size(); j++) { if (numSampled < div[Groups[j]].size()) { float score; if (scale) { score = (div[Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } else { score = div[Groups[j]][numSampled] / (float)numIters; } out << '\t' << setprecision(4) << score ; }else { out << "\tNA" ; } } out << endl; } out.close(); } //********************************************************************************************************************** int PhyloDiversityCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); current->setTreeFile(treefile); TreeReader* reader; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } else { reader = new TreeReader(treefile, countfile); } vector trees = reader->getTrees(); CountTable* ct; ct = trees[0]->getCountTable(); delete reader; vector tGroups = ct->getNamesOfGroups(); if (Groups.size() == 0) { Groups = tGroups; } else { //check that groups are valid for (int i = 0; i < Groups.size(); i++) { if (!util.inUsersGroups(Groups[i], tGroups)) { m->mothurOut(Groups[i] + " is not a valid group, and will be disregarded.\n"); // erase the invalid group from userGroups Groups.erase(Groups.begin()+i); i--; } } } //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed for (int i = 0; i < Groups.size(); i++) { if (Groups[i] == "xxx") { Groups.erase(Groups.begin()+i); break; } } vector outputNames; //for each of the users trees for(int i = 0; i < trees.size(); i++) { if (m->getControl_pressed()) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } ofstream outRare; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(i+1); string outSumFile = getOutputFileName("summary",variables); string outRareFile = getOutputFileName("rarefy",variables); string outCollectFile = getOutputFileName("phylodiv",variables); if (summary) { outputNames.push_back(outSumFile); outputTypes["summary"].push_back(outSumFile); } if (rarefy) { util.openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile); outputTypes["rarefy"].push_back(outRareFile); } if (collect) { outputNames.push_back(outCollectFile); outputTypes["phylodiv"].push_back(outCollectFile); } int numLeafNodes = trees[i]->getNumLeaves(); //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator vector randomLeaf; for (int j = 0; j < numLeafNodes; j++) { if (util.inUsersGroups(trees[i]->tree[j].getGroup(), Groups) ) { //is this a node from the group the user selected. randomLeaf.push_back(j); } } numLeafNodes = randomLeaf.size(); //reset the number of leaf nodes you are using //each group, each sampling, if no rarefy iters = 1; map > diversity; //each group, each sampling, if no rarefy iters = 1; map > sumDiversity; //find largest group total int largestGroup = 0; for (int j = 0; j < Groups.size(); j++) { int numSeqsThisGroup = ct->getGroupCount(Groups[j]); if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; } //initialize diversity diversity[Groups[j]].resize(numSeqsThisGroup+1, 0.0); //numSampled //groupA 0.0 0.0 //initialize sumDiversity sumDiversity[Groups[j]].resize(numSeqsThisGroup+1, 0.0); } //convert freq percentage to number if (subsample) { largestGroup = subsampleSize; } int increment = 100; if (freq < 1.0) { increment = largestGroup * freq; }else { increment = freq; } //initialize sampling spots set numSampledList; for(int k = 1; k <= largestGroup; k++){ if((k == 1) || (k % increment == 0)){ numSampledList.insert(k); } } if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); } //add other groups ending points if (!subsample) { for (int j = 0; j < Groups.size(); j++) { if (numSampledList.count(diversity[Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[Groups[j]].size()-1); } } } createProcesses(trees[i], ct, diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollectFile, outSumFile); if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters, Groups, scale); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to run phylo.diversity.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "execute"); exit(1); } } //********************************************************************************************************************** //need a vector of floats one branch length for every group the node represents. vector calcBranchLength(Tree* t, int leaf, vector< map >& counted, map roots, MothurOut* m){ try { //calc the branch length //while you aren't at root vector sums; int index = leaf; vector groups = t->tree[leaf].getGroup(); sums.resize(groups.size(), 0.0); Utils util; //you are a leaf if(!util.isEqual(t->tree[index].getBranchLength(), -1)){ for (int k = 0; k < groups.size(); k++) { sums[k] += abs(t->tree[index].getBranchLength()); } } index = t->tree[index].getParent(); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->getControl_pressed()) { return sums; } for (int k = 0; k < groups.size(); k++) { if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early if (!util.isEqual(t->tree[index].getBranchLength(), -1)) { sums[k] += abs(t->tree[index].getBranchLength()); } counted[index][groups[k]] = true; } } index = t->tree[index].getParent(); } return sums; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength"); exit(1); } } //********************************************************************************************************************** map getRootForGroups(Tree* t, MothurOut* m){ try { map roots; //maps group to root for group, may not be root of tree map done; //initialize root for all groups to -1 for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; } for (int i = 0; i < t->getNumLeaves(); i++) { vector groups = t->tree[i].getGroup(); int index = t->tree[i].getParent(); for (int j = 0; j < groups.size(); j++) { if (!done[groups[j]]) { //we haven't found the root for this group yet, initialize it done[groups[j]] = true; roots[groups[j]] = i; //set root to self to start } //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->getControl_pressed()) { return roots; } //do both your chidren have have descendants from the users groups? int lc = t->tree[index].getLChild(); int rc = t->tree[index].getRChild(); int LpcountSize = 0; map:: iterator itGroup = t->tree[lc].pcount.find(groups[j]); if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } int RpcountSize = 0; itGroup = t->tree[rc].pcount.find(groups[j]); if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root if (index > roots[groups[j]]) { roots[groups[j]] = index; } }else { ;} index = t->tree[index].getParent(); } } } return roots; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups"); exit(1); } } /***********************************************************************/ struct phylodivData { int numIters; MothurOut* m; map< string, vector > div; map > sumDiv; vector< vector > randomLeaf; //each iters randomized nodes set numSampledList; int increment, subsampleSize; string collectName, sumName; Tree* t; CountTable* ct; bool includeRoot, subsample, rarefy, collect, summary, doCollect, doSum, scale; Utils util; vector Groups; phylodivData(){} phylodivData(int ni, map< string, vector > cd, map< string, vector > csd, Tree* tree, CountTable* count, int incre, vector< vector > crl, set nsl, bool su, int suS, vector gps, bool ds, bool dc, bool rar, bool sc, string coln, string sumn) { m = MothurOut::getInstance(); t = tree; ct = count; div = cd; numIters = ni; sumDiv = csd; increment = incre; randomLeaf = crl; numSampledList = nsl; subsample = su; subsampleSize = suS; Groups = gps; doSum = ds; doCollect = dc; collect = false; collectName = coln; if (coln != "") { collect = true; } summary = false; sumName = sumn; if (sumn != "") { summary = true; } rarefy = rar; scale = sc; } }; //********************************************************************************************************************** int driverPhylo(phylodivData* params){ try { int numLeafNodes = params->randomLeaf[0].size(); map rootForGroup = getRootForGroups(params->t, params->m); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches. for (int l = 0; l < params->numIters; l++) { vector thisItersRandomLeaves = params->randomLeaf[l]; //initialize counts map counts; vector< map > countedBranch; for (int i = 0; i < params->t->getNumNodes(); i++) { map temp; for (int j = 0; j < params->Groups.size(); j++) { temp[params->Groups[j]] = false; } countedBranch.push_back(temp); } for (int j = 0; j < params->Groups.size(); j++) { counts[params->Groups[j]] = false; } map metCount; bool allDone = false; for(int k = 0; k < numLeafNodes; k++){ if (params->m->getControl_pressed()) { return 0; } //calc branch length of randomLeaf k vector br = calcBranchLength(params->t, thisItersRandomLeaves[k], countedBranch, rootForGroup, params->m); //for each group in the groups update the total branch length accounting for the names file vector groups = params->t->tree[thisItersRandomLeaves[k]].getGroup(); for (int j = 0; j < groups.size(); j++) { if (params->util.inUsersGroups(groups[j], params->Groups)) { int numSeqsInGroupJ = 0; map::iterator it; it = params->t->tree[thisItersRandomLeaves[k]].pcount.find(groups[j]); if (it != params->t->tree[thisItersRandomLeaves[k]].pcount.end()) { //this leaf node contains seqs from group j numSeqsInGroupJ = it->second; } if (numSeqsInGroupJ != 0) { params->div[groups[j]][(counts[groups[j]]+1)] = params->div[groups[j]][counts[groups[j]]] + br[j]; } for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { params->div[groups[j]][s] = params->div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths } counts[groups[j]] += numSeqsInGroupJ; if (params->subsample) { if (counts[groups[j]] >= params->subsampleSize) { metCount[groups[j]] = true; } bool allTrue = true; for (int h = 0; h < params->Groups.size(); h++) { if (!metCount[params->Groups[h]]) { allTrue = false; } } if (allTrue) { allDone = true; } } if (allDone) { j+=groups.size(); k+=numLeafNodes; } } } } //if you subsample then rarefy=t if (params->rarefy) { //add this diversity to the sum for (int j = 0; j < params->Groups.size(); j++) { for (int g = 0; g < params->div[params->Groups[j]].size(); g++) { params->sumDiv[params->Groups[j]][g] += params->div[params->Groups[j]][g]; } } } if ((params->collect) && params->doCollect) { ofstream outCollect; params->util.openOutputFile(params->collectName, outCollect); printData(params->numSampledList, params->div, outCollect, 1, params->Groups, params->scale); params->doCollect = false; } if ((params->summary) && params->doSum) { ofstream outSum; params->util.openOutputFile(params->sumName, outSum); printSumData(params->div, outSum, 1, params->Groups, params->subsampleSize, params->subsample, params->scale); params->doSum = false; } if((l+1) % 100 == 0){ params->m->mothurOutJustToScreen(toString(l+1)+"\n"); } } if((params->numIters) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->numIters)+"\n"); } return 0; } catch(exception& e) { params->m->errorOut(e, "PhyloDiversityCommand", "driverPhylo"); exit(1); } } //********************************************************************************************************************** int PhyloDiversityCommand::createProcesses(Tree* t, CountTable* ct, map< string, vector >& div, map >& sumDiv, int numIters, int increment, vector& randomLeaf, set& numSampledList, string outCollect, string outSum){ try { vector Treenames = t->getTreeNames(); vector procIters; if (iters < processors) { iters = processors; } int numItersPerProcessor = iters / processors; //divide iters between processes for (int h = 0; h < processors; h++) { if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } procIters.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads vector origRandomLeaf = randomLeaf; for (int i = 0; i < processors-1; i++) { //create randomize randomLeaf for each iter vector< vector > thisRandomLeaf; for (int j = 0; j < procIters[i+1]; j++) { randomLeaf = origRandomLeaf; util.mothurRandomShuffle(randomLeaf); thisRandomLeaf.push_back(randomLeaf); } CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); phylodivData* dataBundle = new phylodivData(procIters[i+1], div, sumDiv, copyTree, copyCount, increment, thisRandomLeaf, numSampledList, subsample, subsampleSize, Groups, false, false, rarefy, scale, "", ""); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverPhylo, dataBundle)); } vector< vector > thisRandomLeaf; for (int j = 0; j < procIters[0]; j++) { randomLeaf = origRandomLeaf; util.mothurRandomShuffle(randomLeaf); thisRandomLeaf.push_back(randomLeaf); } CountTable* copyCount = new CountTable(); copyCount->copy(ct); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); phylodivData* dataBundle = new phylodivData(procIters[0], div, sumDiv, copyTree, copyCount, increment, thisRandomLeaf, numSampledList, subsample, subsampleSize, Groups, true, true, rarefy, scale, outCollect, outSum); driverPhylo(dataBundle); sumDiv = dataBundle->sumDiv; delete copyTree; delete copyCount; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); for (map >::iterator itSum = data[i]->sumDiv.begin(); itSum != data[i]->sumDiv.end(); itSum++) { for (int k = 0; k < (itSum->second).size(); k++) { sumDiv[itSum->first][k] += (itSum->second)[k]; } } delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } delete dataBundle; return 0; } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/phylodiversitycommand.h000077500000000000000000000036671424121717000230470ustar00rootroot00000000000000#ifndef PHYLODIVERSITYCOMMAND_H #define PHYLODIVERSITYCOMMAND_H /* * phylodiversitycommand.h * Mothur * * Created by westcott on 4/30/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "counttable.h" #include "tree.h" class PhyloDiversityCommand : public Command { public: PhyloDiversityCommand(string); ~PhyloDiversityCommand(){} vector setParameters(); string getCommandName() { return "phylo.diversity"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Faith DP (1994). Phylogenetic pattern and the quantification of organismal biodiversity. Philos Trans R Soc Lond B Biol Sci 345: 45-58. \nhttp://www.mothur.org/wiki/Phylo.diversity"; } string getDescription() { return "phylo.diversity"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: float freq; int iters, processors, numUniquesInName, subsampleSize; bool abort, rarefy, summary, collect, scale, subsample; string groups, treefile, groupfile, namefile, countfile; vector Groups, outputNames; //holds groups to be used, and outputFile names //map getRootForGroups(Tree* t); //void printData(set&, map< string, vector >&, ofstream&, int); //void printSumData(map< string, vector >&, ofstream&, int); //vector calcBranchLength(Tree*, int, vector< map >&, map); //int driver(Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&, bool); int createProcesses(Tree*, CountTable*, map< string, vector >&, map >&, int, int, vector&, set&, string, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/phylotypecommand.cpp000077500000000000000000000331231424121717000223270ustar00rootroot00000000000000/* * phylotypecommand.cpp * Mothur * * Created by westcott on 11/20/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "phylotypecommand.h" #include "phylotree.h" #include "listvector.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "counttable.h" //********************************************************************************************************************** vector PhylotypeCommand::setParameters(){ try { CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","list-rabund-sabund",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pcutoff("cutoff", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pcutoff); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["rabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PhylotypeCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PhylotypeCommand::getHelpString(){ try { string helpString = ""; helpString += "The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n"; helpString += "The phylotype command parameter options are taxonomy, name, count, cutoff and label. The taxonomy parameter is required.\n"; helpString += "The cutoff parameter allows you to specify the level you want to stop at. The default is the highest level in your taxonomy file. \n"; helpString += "For example: taxonomy = Bacteria;Bacteroidetes-Chlorobi;Bacteroidetes; - cutoff=2, would truncate the taxonomy to Bacteria;Bacteroidetes-Chlorobi; \n"; helpString += "For the cutoff parameter levels count up from the root of the phylotree. This enables you to look at the grouping down to a specific resolution, say the genus level.\n"; helpString += "The label parameter allows you to specify which level you would like, and are separated by dashes. The default all levels in your taxonomy file. \n"; helpString += "For the label parameter, levels count down from the root to keep the output similar to mothur's other commands which report information from finer resolution to coarser resolutions.\n"; helpString += "The phylotype command should be in the following format: \n"; helpString += "phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n"; helpString += "Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PhylotypeCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PhylotypeCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; } else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PhylotypeCommand", "getOutputPattern"); exit(1); } } /**********************************************************************************************************************/ PhylotypeCommand::PhylotypeCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; taxonomyFileName = validParameter.validFile(parameters, "taxonomy"); if (taxonomyFileName == "not found") { taxonomyFileName = current->getTaxonomyFile(); if (taxonomyFileName != "") { m->mothurOut("Using " + taxonomyFileName + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("No valid current files. taxonomy is a required parameter.\n"); abort = true; } }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; } else { current->setTaxonomyFile(taxonomyFileName); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { util.readNames(namefile, namemap); current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if (outputdir == ""){ outputdir += util.hasPath(taxonomyFileName); } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, cutoff); label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; allLines = true; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } } } catch(exception& e) { m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand"); exit(1); } } /**********************************************************************************************************************/ int PhylotypeCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //reads in taxonomy file and makes all the taxonomies the same length //by appending the last taxon to a given taxonomy as many times as needed to //make it as long as the longest taxonomy in the file TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff, outputdir); if (m->getControl_pressed()) { delete taxEqual; return 0; } string equalizedTaxFile = taxEqual->getEqualizedTaxFile(); delete taxEqual; //build taxonomy tree from equalized file PhyloTree* tree = new PhyloTree(equalizedTaxFile); vector leaves = tree->getGenusNodes(); //store leaf nodes in current map for (int i = 0; i < leaves.size(); i++) { currentNodes[leaves[i]] = leaves[i]; } bool done = false; if (tree->get(leaves[0]).parent == -1) { m->mothurOut("Empty Tree\n"); done = true; } if (m->getControl_pressed()) { delete tree; return 0; } ofstream outList, outRabund, outSabund; map variables; string fileroot = outputdir + util.getRootName(util.getSimpleName(taxonomyFileName)); variables["[filename]"] = fileroot; variables["[clustertag]"] = "tx"; string sabundFileName = getOutputFileName("sabund", variables); string rabundFileName = getOutputFileName("rabund", variables); //if (countfile != "") { variables["[tag2]"] = "unique_list"; } string listFileName = getOutputFileName("list", variables); map counts; if (countfile == "") { util.openOutputFile(sabundFileName, outSabund); util.openOutputFile(rabundFileName, outRabund); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); }else { CountTable ct; ct.readTable(countfile, false, false); counts = ct.getNameMap(); } util.openOutputFile(listFileName, outList); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); int count = 1; bool printHeaders = true; //start at leaves of tree and work towards root, processing the labels the user wants while((!done) && ((allLines == 1) || (labels.size() != 0))) { string level = toString(count); count++; if (m->getControl_pressed()) { if (countfile == "") { outRabund.close(); outSabund.close(); } outList.close(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete tree; return 0; } //is this a level the user want output for if(allLines == 1 || labels.count(level) == 1){ //output level m->mothurOut(level); m->mothurOutEndLine(); ListVector list("Phylo"); list.setLabel(level); //go through nodes and build listvector for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) { //get parents TaxNode node = tree->get(itCurrent->first); parentNodes[node.parent] = node.parent; vector names = node.accessions; //make the names compatable with listvector string name = ""; for (int i = 0; i < names.size(); i++) { if (names[i] != "unknown") { if (namefile != "") { map::iterator itNames = namemap.find(names[i]); //make sure this name is in namefile if (itNames != namemap.end()) { name += namemap[names[i]] + ","; } //you found it in namefile else { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct.\n"); m->setControl_pressed(true); } }else{ name += names[i] + ","; } } } if (m->getControl_pressed()) { break; } name = name.substr(0, name.length()-1); //rip off extra ',' //add bin to list vector if (name != "") { list.push_back(name); } //caused by unknown } if (printHeaders) { //only print headers the first time printHeaders = false; }else { list.setPrintedLabels(printHeaders); } //print listvector if (countfile == "") { list.print(outList); } else { list.print(outList, counts); } if (countfile == "") { //print rabund list.getRAbundVector().print(outRabund); //print sabund list.getSAbundVector().print(outSabund); } labels.erase(level); }else { //just get parents for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) { int parent = tree->get(itCurrent->first).parent; parentNodes[parent] = parent; } } //move up a level currentNodes = parentNodes; parentNodes.clear(); //have we reached the rootnode if (tree->get(currentNodes.begin()->first).parent == -1) { done = true; } } outList.close(); if (countfile == "") { outSabund.close(); outRabund.close(); } delete tree; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set list file as new current listfile string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PhylotypeCommand", "execute"); exit(1); } } /**********************************************************************************************************************/ mothur-1.48.0/source/commands/phylotypecommand.h000077500000000000000000000023761424121717000220020ustar00rootroot00000000000000#ifndef PHYLOTYPECOMMAND_H #define PHYLOTYPECOMMAND_H /* * phylotypecommand.h * Mothur * * Created by westcott on 11/20/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "taxonomyequalizer.h" #include "command.hpp" /*************************************************************************/ class PhylotypeCommand : public Command { public: PhylotypeCommand(string); ~PhylotypeCommand(){} vector setParameters(); string getCommandName() { return "phylotype"; } string getCommandCategory() { return "Clustering"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Phylotype"; } string getDescription() { return "cluster your sequences into OTUs based on their classifications"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; string taxonomyFileName, label, namefile, countfile; set labels; //holds labels to be used int cutoff; map namemap; vector outputNames; map currentNodes; map parentNodes; map::iterator itCurrent; }; /*************************************************************************/ #endif mothur-1.48.0/source/commands/preclustercommand.cpp000066400000000000000000001706511424121717000224670ustar00rootroot00000000000000/* * preclustercommand.cpp * Mothur * * Created by westcott on 12/21/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "splitgroupscommand.h" #include "preclustercommand.h" #include "uniqueseqscommand.h" #include "summary.hpp" //********************************************************************************************************************** vector PreClusterCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pdiffs("diffs", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter palign("align", "Multiple", "needleman-gotoh-noalign", "needleman", "", "", "","",false,false); parameters.push_back(palign); CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch); CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch); CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen); CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend); CommandParameter palpha("alpha", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pdelta("delta", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(pdelta); CommandParameter pmethod("method", "Multiple", "simple-unoise-tree-deblur", "simple", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pclump("clump", "Multiple", "lessthan-lessthanequal", "lessthan", "", "", "","",false,false); parameters.push_back(pclump); CommandParameter perror_rate("error_rate", "Number", "", "0.005", "", "", "","",false,false); parameters.push_back(perror_rate); CommandParameter pindel_prob("indel_prob", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pindel_prob); CommandParameter pmax_indels("max_indels", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pmax_indels); CommandParameter perror_dist("error_dist", "String", "", "1-0.06-0.02-0.02-0.01-0.005-0.005-0.005-0.001-0.001-0.001-0.0005", "", "", "","",false,false); parameters.push_back(perror_dist); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["map"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PreClusterCommand::getHelpString(){ try { string helpString = ""; helpString += "The pre.cluster command groups sequences that are within a given number of base mismatches.\n"; helpString += "The pre.cluster command outputs a new fasta and count file.\n"; helpString += "The pre.cluster command parameters are fasta, count, method, processors and diffs. The fasta parameter is required. \n"; helpString += "The name parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n"; helpString += "The group parameter allows you to provide a group file so you can cluster by group. \n"; helpString += "The count parameter allows you to provide a count file so you can cluster by group. \n"; helpString += "The diffs parameter allows you to specify maximum number of mismatched bases allowed between sequences in a grouping. The default is 1.\n"; helpString += "The method parameter allows you to specify the algorithm to use to complete the preclusterign step. Possible methods include simple, tree, unoise, and deblur. Default=simple.\n"; helpString += "The clump parameter allows you to specify which reads can be combined. Possible options include lessthan and lessthanequal. lessthan -> merge reads with less abundance. lessthanequal -> merge reads with less than or equal abundance Default=lessthan.\n"; helpString += "The align parameter allows you to specify the alignment align_method to use. Your options are: gotoh, needleman and noalign. The default is needleman.\n"; helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; helpString += "The alpha parameter allows you to specify the alpha value for the beta formula, which is used in the unoise algorithm. The default is 2.0.\n"; helpString += "The delta parameter allows you to specify the delta value, which describes the amount of amplification between rounds of PCR. It is used in the tree algorithm. The default is 2.0.\n"; helpString += "The error_rate parameter is used with the deblur algorithm and is the expected mean error rate, as a fraction, of the data going into this command.\n"; helpString += "The indel_prob parameter is used with the deblur algorithm and is the expected fraction of sequences that have an insertion or deletion, of the data going into this command.\n"; helpString += "The max_indels parameter is used with the deblur algorithm and is the maximum number of insertions or deletions you expect to be in the data going into this command.\n"; helpString += "The error_dist parameter is used with the deblur algorithm and is the fraction of sequences you expect to have 0, 1, 2, 3, etc. errors. Should start with 1 and be separated by hyphens (e.g. 1-0.06-0.02-0.02-0.01-0.005-0.005-0.005-0.001-0.001-0.001-0.0005). Alternatively, you can use error_dist=binomial and the command will determine the distribution for you\n"; helpString += "The pre.cluster command should be in the following format: \n"; helpString += "pre.cluster(fasta=yourFastaFile, names=yourNamesFile, diffs=yourMaxDiffs) \n"; helpString += "Example pre.cluster(fasta=amazon.fasta, diffs=2).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PreClusterCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],precluster,[extension]"; } //else if (type == "name") { pattern = "[filename],precluster.names"; } else if (type == "count") { pattern = "[filename],precluster.count_table"; } else if (type == "map") { pattern = "[filename],precluster.map"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "getOutputPattern"); exit(1); } } //************************************************************************************************** PreClusterCommand::PreClusterCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } if (outputdir == ""){ outputdir += util.hasPath(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... namefile = validParameter.validFile(parameters, "name"); if (namefile == "not found") { namefile = ""; } else if (namefile == "not open") { namefile = ""; abort = true; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not found") { groupfile = ""; bygroup = false; } else if (groupfile == "not open") { abort = true; groupfile = ""; } else { current->setGroupFile(groupfile); bygroup = true; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not found") { countfile = ""; } else if (countfile == "not open") { abort = true; countfile = ""; } else { current->setCountFile(countfile); CountTable ct; if (ct.testGroups(countfile)) { bygroup = true; } //check for groups without reading else { bygroup = false; } } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } string temp = validParameter.valid(parameters, "diffs"); if(temp == "not found"){ temp = "1"; } util.mothurConvert(temp, diffs); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "method"); if(temp == "not found"){ temp = "simple"; } pc_method = temp; if ((pc_method == "simple") || (pc_method == "tree") || (pc_method == "unoise") || (pc_method == "deblur")) { } else { m->mothurOut("[ERROR]: Not a valid precluster method. Valid preclustering algorithms include simple, tree, unoise, and deblur. Using simple.\n"); pc_method= "simple"; } temp = validParameter.valid(parameters, "clump"); if(temp == "not found"){ temp = "lessthan"; } clump = temp; if ((clump == "lessthan") || (clump == "lessthanequal")) { } else { m->mothurOut("[ERROR]: Not a valid clump method. Valid clumping options are lessthan and lessthanequal. Using lessthan.\n"); clump = "lessthan"; } temp = validParameter.valid(parameters, "match"); if(temp == "not found"){ temp = "1.0"; } util.mothurConvert(temp, match); temp = validParameter.valid(parameters, "mismatch"); if(temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, misMatch); if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapopen"); if(temp == "not found"){ temp = "-2.0"; } util.mothurConvert(temp, gapOpen); if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "gapextend"); if (temp == "not found"){ temp = "-1.0"; } util.mothurConvert(temp, gapExtend); if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } temp = validParameter.valid(parameters, "alpha"); if (temp == "not found"){ temp = "2.0"; } util.mothurConvert(temp, alpha); if (alpha < 0) { m->mothurOut("[ERROR]: alpha must be positive.\n"); abort=true; } temp = validParameter.valid(parameters, "delta"); if (temp == "not found"){ temp = "2.0"; } util.mothurConvert(temp, delta); if (delta < 0) { m->mothurOut("[ERROR]: delta must be positive.\n"); abort=true; } temp = validParameter.valid(parameters, "error_dist"); if (temp == "not found"){ error_dist = {1, 0.06, 0.02, 0.02, 0.01, 0.005, 0.005, 0.005, 0.001, 0.001, 0.001, 0.0005}; } else if(temp == "binomial"){ error_dist = {-100}; } else { string probability; istringstream probabilityStream(temp); while (getline(probabilityStream, probability, '-')){ error_dist.push_back(stof(probability)); } } temp = validParameter.valid(parameters, "error_rate"); if (temp == "not found"){ temp = "0.005"; } util.mothurConvert(temp, error_rate); if (error_rate < 0) { m->mothurOut("[ERROR]: error_rate must be positive.\n"); abort=true; } else if (error_rate > 1) { m->mothurOut("[ERROR]: error_rate is a fraction and should be less than 1.\n"); abort=true; } temp = validParameter.valid(parameters, "indel_prob"); if (temp == "not found"){ temp = "0.01"; } util.mothurConvert(temp, indel_prob); if (indel_prob < 0) { m->mothurOut("[ERROR]: indel_prob must be positive.\n"); abort=true; } else if (indel_prob > 1) { m->mothurOut("[ERROR]: indel_prob is a fraction and should be less than 1.\n"); abort=true; } temp = validParameter.valid(parameters, "max_indels"); if (temp == "not found"){ temp = "3"; } util.mothurConvert(temp, max_indels); if (indel_prob < 0) { m->mothurOut("[ERROR]: max_indels must be positive.\n"); abort=true; } align = validParameter.valid(parameters, "align"); if (align == "not found"){ align = "needleman"; } align_method = "unaligned"; if (!abort) { if ((namefile != "") || (groupfile != "")) { //convert to count string rootFileName = namefile; if (rootFileName == "") { rootFileName = groupfile; } if (outputdir == "") { outputdir = util.hasPath(rootFileName); } string outputFileName = outputdir + util.getRootName(util.getSimpleName(rootFileName)) + "count_table"; CountTable ct; ct.createTable(namefile, groupfile, nullVector); ct.printCompressedTable(outputFileName); outputNames.push_back(outputFileName); current->setCountFile(outputFileName); countfile = outputFileName; } } } } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "PreClusterCommand"); exit(1); } } /************************************************************/ inline bool comparePriorityAbundance(seqPNode* first, seqPNode* second) { if (first->numIdentical > second->numIdentical){ return true; } return false; } //************************************************************************************************** struct preClusterData { string fastafile, countfile, pc_method, align_method, align, newMName, clump; OutputWriter* newNName; MothurOut* m; int start, end, count, diffs, length, numGroups; vector groups; bool hasCount; float match, misMatch, gapOpen, gapExtend, alpha, delta, error_rate, indel_prob, max_indels; Utils util; vector error_dist; vector outputNames; map > outputTypes; vector alignSeqs; //maps the number of identical seqs to a sequence. filled and freed by functions Alignment* alignment; map > parsedFiles; // double error_rate = 0.005;error_rate // double indel_prob = 0.01;indel_prob // double max_indels = 3;max_indels ~preClusterData() { if (alignment != nullptr) { delete alignment; } } preClusterData(){} preClusterData(map > g2f, string f, string c, string pcm, string am, string cl, OutputWriter* nnf, string nmf, vector gr) { fastafile = f; pc_method = pcm; align_method = am; clump = cl; newNName = nnf; newMName = nmf; groups = gr; hasCount = false; countfile = c; if (countfile != "") { hasCount = true; } count=0; m = MothurOut::getInstance(); parsedFiles = g2f; } void setVariables(int d, string pcm, string am, string al, float ma, float misma, float gpOp, float gpEx, float a, float del, float me, float ip, float mi, vector ed) { numGroups = groups.size(); diffs = d; pc_method = pcm; align_method = am; align = al; match = ma; misMatch = misma; gapExtend = gpEx; gapOpen = gpOp; alpha = a; delta = del; error_rate = me; indel_prob = ip; max_indels = mi; error_dist = ed; length = 0; if (align_method == "unaligned") { if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 1000); } else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000); } else if(align == "noalign") { alignment = new NoAlign(); } else { m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); m->mothurOutEndLine(); alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000); } } else { alignment = nullptr; } } }; /**************************************************************************************************/ int calcMisMatches(string seq1, string seq2, preClusterData* params){ try { int numBad = 0; if (params->align_method == "unaligned") { //align to eachother Sequence seqI("seq1", seq1); Sequence seqJ("seq2", seq2); //align seq2 to seq1 - less abundant to more abundant params->alignment->align(seqJ.getUnaligned(), seqI.getUnaligned()); seq2 = params->alignment->getSeqAAln(); seq1 = params->alignment->getSeqBAln(); //chop gap ends int startPos = 0; int endPos = seq2.length()-1; for (int i = 0; i < seq2.length(); i++) { if (isalpha(seq2[i])) { startPos = i; break; } } for (int i = seq2.length()-1; i >= 0; i--){ if (isalpha(seq2[i])) { endPos = i; break; } } //count number of diffs for (int i = startPos; i <= endPos; i++) { if (seq2[i] != seq1[i]) { numBad++; } if (numBad > params->diffs) { return params->length; } //too far to cluster } } else { //count diffs for (int i = 0; i < seq1.length(); i++) { //do they match if (seq1[i] != seq2[i]) { numBad++; } if (numBad > params->diffs) { return params->length; } //too far to cluster } } return numBad; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "calcMisMatches"); exit(1); } } /**************************************************************************************************/ vector calcMisMatchesIndels(string seq1, string seq2, preClusterData* params){ try { int numSubstitutions = 0; int numInDels = 0; if (params->align_method == "unaligned") { //align to eachother Sequence seqI("seq1", seq1); Sequence seqJ("seq2", seq2); //align seq2 to seq1 - less abundant to more abundant params->alignment->align(seqJ.getUnaligned(), seqI.getUnaligned()); seq2 = params->alignment->getSeqAAln(); seq1 = params->alignment->getSeqBAln(); //chop gap ends int startPos = 0; int endPos = seq2.length()-1; for (int i = 0; i < seq2.length(); i++) { if (isalpha(seq2[i])) { startPos = i; break; } } for (int i = seq2.length()-1; i >= 0; i--){ if (isalpha(seq2[i])) { endPos = i; break; } } //count number of diffs for (int i = startPos; i <= endPos; i++) { if (seq2[i] != seq1[i] && (isalpha(seq1[i]) && isalpha(seq2[i]))) { numSubstitutions++; } else if(seq2[i] != seq1[i] && (isalpha(seq1[i]) || isalpha(seq2[i]))){ numInDels++; } if (numSubstitutions > params->diffs) { numSubstitutions = params->length; break; } } } else { //count diffs for (int i = 0; i < seq1.length(); i++) { //do they match if (seq2[i] != seq1[i] && (isalpha(seq1[i]) && isalpha(seq2[i]))) { numSubstitutions++; } else if(seq2[i] != seq1[i] && (isalpha(seq1[i]) || isalpha(seq2[i]))){ numInDels++; } if (numSubstitutions > params->diffs) { numSubstitutions = params->length; break; } } } return vector{numSubstitutions, numInDels}; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "calcMisMatchesIndels"); exit(1); } } /**************************************************************************************************/ void mergeSeqs(seqPNode* representative, seqPNode* duplicate, string& chunk, int mismatch, int originalCount, preClusterData* params){ try { //merge if (params->hasCount) { representative->clusteredIndexes.clear(); duplicate->clusteredIndexes.clear(); } //we use numIdentical to build the count table, don't need the names. else { representative->clusteredIndexes.insert(representative->clusteredIndexes.end(), duplicate->clusteredIndexes.begin(), duplicate->clusteredIndexes.end()); duplicate->clusteredIndexes.clear(); } representative->numIdentical += duplicate->numIdentical; chunk += (representative->name + "\t" + duplicate->name + "\t" + toString(originalCount) + "\t" + toString(mismatch) + "\t" + duplicate->sequence + "\n"); duplicate->numIdentical = 0; duplicate->diffs = mismatch; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "mergeSeqs"); exit(1); } } /**************************************************************************************************/ int process(string group, string newMapFile, preClusterData* params){ try { ofstream out; if(params->pc_method != "deblur"){ params->util.openOutputFile(newMapFile, out); if (params->align_method == "unaligned") { out << "ideal_seq\terror_seq\tabundance\tdiffs\tsequence" << endl; }else { out << "ideal_seq\terror_seq\tabundance\tdiffs\tfiltered_sequence" << endl; } } int count = 0; long long numSeqs = params->alignSeqs.size(); vector originalCount(numSeqs); for (int i = 0; i < numSeqs; i++) { originalCount[i] = params->alignSeqs[i]->numIdentical; } bool lessThan = true; if (params->clump == "lessthanequal") { lessThan = false; } if(params->pc_method == "simple"){ for (int i = 0; i < numSeqs; i++) { if (params->alignSeqs[i]->numIdentical != 0) { //this sequence has not been merged yet string chunk = params->alignSeqs[i]->name + "\t" + params->alignSeqs[i]->name + "\t" + toString(originalCount[i]) + "\t" + toString(0) + "\t" + params->alignSeqs[i]->sequence + "\n"; //try to merge it with all smaller seqs for (int j = i+1; j < numSeqs; j++) { if (params->m->getControl_pressed()) { out.close(); return 0; } bool ableToMerge = false; if (lessThan) { //default if (originalCount[j] < originalCount[i]) { ableToMerge = true; } }else { //less than equal to if (originalCount[j] <= originalCount[i]) { ableToMerge = true; } } if ((params->alignSeqs[j]->numIdentical != 0) && (ableToMerge)) { //this sequence has not been merged yet // //are you within "diff" bases int mismatch = calcMisMatches(params->alignSeqs[i]->sequence, params->alignSeqs[j]->sequence, params); if (mismatch <= params->diffs) { mergeSeqs(params->alignSeqs[i], params->alignSeqs[j], chunk, mismatch, originalCount[j], params); count++; } } } out << chunk; } if(i % 1000 == 0) { params->m->mothurOutJustToScreen(group + toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); } } if(numSeqs % 1000 != 0) { params->m->mothurOut(group + toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count) + "\n"); } } else if(params->pc_method == "unoise") { vector beta(params->diffs+1, 0); for(int i=0;ialpha * i + 1.0); } for (int i = 0; i < numSeqs; i++) { if (params->alignSeqs[i]->numIdentical != 0) { //this sequence has not been merged yet string chunk = params->alignSeqs[i]->name + "\t" + params->alignSeqs[i]->name + "\t" + toString(originalCount[i]) + "\t" + toString(0) + "\t" + params->alignSeqs[i]->sequence + "\n"; //try to merge it with all smaller seqs for (int j = i+1; j < numSeqs; j++) { if (params->m->getControl_pressed()) { out.close(); return 0; } if (params->alignSeqs[j]->numIdentical != 0 && (originalCount[j] < originalCount[i])) { //this sequence has not been merged yet double skew = (double)originalCount[j]/(double)originalCount[i]; int mismatch = calcMisMatches(params->alignSeqs[i]->sequence, params->alignSeqs[j]->sequence, params); if (mismatch <= params->diffs) { if (skew <= beta[mismatch]) { mergeSeqs(params->alignSeqs[i], params->alignSeqs[j], chunk, mismatch, originalCount[j], params); count++; } } } } out << chunk; } if(i % 100 == 0) { params->m->mothurOutJustToScreen(group + toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); } } if(numSeqs % 100 != 0) { params->m->mothurOut(group + toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count) + "\n"); } } else if(params->pc_method == "tree") { vector cluster(numSeqs, -1); for(int i=0;im->getControl_pressed()) { out.close(); return 0; } for (int j=i+1;jm->getControl_pressed()) { out.close(); return 0; } if(originalCount[i] > originalCount[j] * params->delta){ int mismatches = calcMisMatches(params->alignSeqs[i]->sequence, params->alignSeqs[j]->sequence, params); if(mismatches == 1){ cluster[j] = cluster[i]; } } } if(cluster[i] == i) count++; if(i % 100 == 0) { params->m->mothurOutJustToScreen(group + toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); } } params->m->mothurOutJustToScreen(group + toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); count = 0; vector chunk(numSeqs, ""); for(int i=0;ialignSeqs[i]->name + "\t" + params->alignSeqs[i]->name + "\t" + toString(originalCount[i]) + "\t" + toString(0) + "\t" + params->alignSeqs[i]->sequence + "\n"; } else { params->alignSeqs[cluster[i]]->clusteredIndexes.insert(params->alignSeqs[i]->clusteredIndexes.begin(), params->alignSeqs[i]->clusteredIndexes.end(), params->alignSeqs[cluster[i]]->clusteredIndexes.end()); params->alignSeqs[i]->clusteredIndexes.clear(); params->alignSeqs[cluster[i]]->numIdentical += params->alignSeqs[i]->numIdentical; params->diffs = params->length; int mismatches = calcMisMatches(params->alignSeqs[i]->sequence, params->alignSeqs[cluster[i]]->sequence, params); chunk[cluster[i]] += params->alignSeqs[cluster[i]]->name + "\t" + params->alignSeqs[i]->name + "\t" + toString(originalCount[i]) + "\t" + toString(mismatches) + "\t" + params->alignSeqs[i]->sequence + "\n"; params->alignSeqs[i]->numIdentical = 0; count++; } } for(int i=0;ipc_method == "deblur") { vector weights(numSeqs, 0); for(int i=0;ialignSeqs[i]->numIdentical; } for(int i=0;im->getControl_pressed()) { out.close(); return 0; } if(i % 100 == 0){ cout << i << endl; } if(weights[i] <= 0){ continue; } int max_h_dist = params->error_dist.size(); vector expected_bad_reads(max_h_dist, 0); for(int j=0;jerror_dist[j] * weights[i]; } if(expected_bad_reads[1] < 0.1){ continue; } for(int j=0;jm->getControl_pressed()) { out.close(); return 0; } if(i == j) { continue; } if(weights[j] <= 0){ continue; } vector nSubsInDels(2, 0); nSubsInDels = calcMisMatchesIndels(params->alignSeqs[i]->sequence, params->alignSeqs[j]->sequence, params); if(nSubsInDels[0] >= max_h_dist) { continue; } double correction = expected_bad_reads[nSubsInDels[0]]; if(nSubsInDels[1] > params->max_indels){ correction = 0; } else if(nSubsInDels[1] > 0){ correction = correction * params->indel_prob; } weights[j] -= correction; } } for(int i=0;ialignSeqs[i]->numIdentical = round(weights[i]); if(weights[i] <= 0){ params->alignSeqs[i]->numIdentical = 0; count++; } } } else { cout << "fail!\n"; } if(params->pc_method != "deblur"){ out.close(); } return count; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "process"); exit(1); } } /**************************************************************************************************/ void filterSeqs(vector& alignSeqs, int length, MothurOut* m){ try { string filterString = ""; Filters F; F.setLength(length); F.initialize(); F.setFilter(string(length, '1')); for (int i = 0; i < alignSeqs.size(); i++) { F.getFreqs(alignSeqs[i]->sequence); } F.setNumSeqs(alignSeqs.size()); F.doVerticalAllBases(); filterString = F.getFilter(); //run filter for (int i = 0; i < alignSeqs.size(); i++) { if (m->getControl_pressed()) { break; } string filteredSeq = ""; string align = alignSeqs[i]->sequence; for(int j=0;jsequence = filteredSeq; } } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "filterSeqs"); exit(1); } } /**************************************************************************************************/ //seqPNode(string na, string seq, int n, string nm) : numIdentical(n), name(na), sequence(seq), clusteredNames(nm) { diffs = 0; active = true; } vector readFASTA(preClusterData* params, long long& num){ try { CountTable ct; if (params->hasCount) { ct.readTable(params->countfile, false, true); } //don't read groups to save space ifstream inFasta; params->util.openInputFile(params->fastafile, inFasta); set lengths; vector alignSeqs; while (!inFasta.eof()) { if (params->m->getControl_pressed()) { inFasta.close(); break; } Sequence seq(inFasta); gobble(inFasta); if (seq.getName() != "") { //can get "" if commented line is at end of fasta file //no names file, you are identical to yourself int numReps = 1; if (params->hasCount) { numReps = ct.getNumSeqs(seq.getName()); } vector clusteredIndexes; clusteredIndexes.push_back(alignSeqs.size()); seqPNode* tempNode = new seqPNode(seq.getName(), seq.getAligned(), numReps, clusteredIndexes); alignSeqs.push_back(tempNode); lengths.insert(seq.getAligned().length()); } } inFasta.close(); params->length = *(lengths.begin()); if (lengths.size() > 1) { params->align_method = "unaligned"; } else if (lengths.size() == 1) { params->align_method = "aligned"; filterSeqs(alignSeqs, params->length, params->m); } //sort seqs by number of identical seqs sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityAbundance); num = alignSeqs.size(); return alignSeqs; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "readFASTA"); exit(1); } } /**************************************************************************************************/ void print(string newfasta, string newname, preClusterData* params){ try { CountTable ct; unordered_set accnosNames; if (params->countfile != "") { for (int i = 0; i < params->alignSeqs.size(); i++) { if (params->alignSeqs[i]->numIdentical != 0) { ct.push_back(params->alignSeqs[i]->name, params->alignSeqs[i]->numIdentical); } } } if (params->countfile != "") { ct.printTable(newname); } params->m->mothurOut("/******************************************/\n"); pair ffiles(params->fastafile, newfasta); Command* getCommand = new GetSeqsCommand(accnosNames, ffiles, nullStringPair, nullStringPair, ""); getCommand->execute(); delete getCommand; params->m->mothurOut("/******************************************/\nDone.\n"); } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "print"); exit(1); } } /**************************************************************************************************/ int PreClusterCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if(pc_method == "tree"){ diffs = 1; }else if (pc_method == "simple") { ; } else { Summary summary_data(processors); if(countfile != ""){ summary_data.summarizeFasta(fastafile, countfile, ""); } else if(namefile != ""){ summary_data.summarizeFasta(fastafile, namefile, ""); } int median_length = summary_data.getLength()[3]; int max_abund = summary_data.getMaxAbundance(); if(pc_method == "unoise"){ diffs = int((log2(max_abund)-1) / alpha + 1); } else if(pc_method == "deblur"){ if(util.isEqual(error_dist[0], -100)){ //construct the binomial distribution error_dist.clear(); for(int i=0;i<100;i++){ double choose = 1; double k = 1; for(int j=1;j<=i;j++){ choose *= (median_length - j + 1); k *= j; } choose = choose/k; double a = pow(error_rate, i); double b = pow(1 - error_rate, median_length - i); if(!util.isEqual(a, 0) && !util.isEqual(b, 0)){ error_dist.push_back(choose * a * b); if(error_dist[i] < 0.1 / max_abund){ break; } } else { break; } } error_dist[0] = 1; } double mod_factor = pow((1-error_rate), median_length); for(int i=0;igetProcessors(); string fileroot = outputdir + util.getRootName(util.getSimpleName(fastafile)); map variables; variables["[filename]"] = fileroot; string newCountFile = getOutputFileName("count",variables); string newMapFile = getOutputFileName("map",variables); //add group name if by group variables["[extension]"] = util.getExtension(fastafile); string newFastaFile = getOutputFileName("fasta", variables); outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile); if (countfile != "") { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); } if (bygroup) { //clear out old files ofstream outFasta; util.openOutputFile(newFastaFile, outFasta); outFasta.close(); ofstream outCount; util.openOutputFile(newCountFile, outCount); outCount.close(); newMapFile = fileroot + "precluster."; string convolutedNamesFile = newCountFile + ".temp"; vector groups; map > group2Files; current->setMothurCalling(true); SequenceCountParser cparser(countfile, fastafile, nullVector); current->setMothurCalling(false); //cout << " groups = "<< cparser.getNamesOfGroups().size() << endl; groups = cparser.getNamesOfGroups(); group2Files = cparser.getFiles(); createProcessesGroups(group2Files, groups, convolutedNamesFile, newMapFile); unordered_set accnos; if (countfile != "") { accnos = mergeGroupCounts(newCountFile, convolutedNamesFile); } printFasta(newFastaFile, accnos); util.mothurRemove(convolutedNamesFile); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to run pre.cluster.\n"); }else { if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing.\n"); processors = 1; } vector groups; map > group2Files; preClusterData* params = new preClusterData(group2Files, fastafile, countfile, pc_method, align_method, clump, nullptr, newMapFile, nullVector); params->setVariables(diffs, pc_method, align_method, align, match, misMatch, gapOpen, gapExtend, alpha, delta, error_rate, indel_prob, max_indels, error_dist); //reads fasta file and return number of seqs long long numSeqs = 0; params->alignSeqs = readFASTA(params, numSeqs); //fills alignSeqs and makes all seqs active length = params->length; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct.\n"); return 0; } if (diffs > length) { m->mothurOut("Error: diffs is set to " + toString(diffs) + " which is greater than your sequence length of " + toString(length) + ".\n"); return 0; } int count = process("", newMapFile, params); if(params->pc_method != "deblur"){ outputNames.push_back(newMapFile); outputTypes["map"].push_back(newMapFile); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("Total number of sequences before precluster was " + toString(params->alignSeqs.size()) + ".\n"); m->mothurOut("pre.cluster removed " + toString(count) + " sequences.\n\n"); print(newFastaFile, newCountFile, params); for (int i = 0; i < params->alignSeqs.size(); i++) { delete params->alignSeqs[i]; } params->alignSeqs.clear(); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to cluster " + toString(numSeqs) + " sequences.\n"); } current->setProcessors(numProcessors); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "execute"); exit(1); } } /**************************************************************************************************/ void PreClusterCommand::printFasta(string newFastaFileName, unordered_set accnos){ try { pair ffiles(fastafile, newFastaFileName); m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running get.seqs: \n"); Command* getCommand = new GetSeqsCommand(accnos, ffiles, nullStringPair, nullStringPair, ""); delete getCommand; m->mothurOut("/******************************************/\n"); } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "printFasta"); exit(1); } } /**************************************************************************************************/ void printData(string group, preClusterData* params, map& optionalNameMap){ try { if ((params->hasCount) && (group == "")) { params->newNName->write("Representative_Sequence\ttotal\n"); } if (params->hasCount) { if (group != "") { for (int i = 0; i < params->alignSeqs.size(); i++) { if (params->alignSeqs[i]->numIdentical != 0) { params->newNName->write(group + '\t' + params->alignSeqs[i]->name + '\t' + toString(params->alignSeqs[i]->numIdentical) + '\n'); } } } else { for (int i = 0; i < params->alignSeqs.size(); i++) { if (params->alignSeqs[i]->numIdentical != 0) { params->newNName->write(params->alignSeqs[i]->name + '\t' + toString(params->alignSeqs[i]->numIdentical) + '\n'); } } } }else { for (int i = 0; i < params->alignSeqs.size(); i++) { if (params->alignSeqs[i]->numIdentical != 0) { string clusteredNames = ""; for (int j = 0; j < params->alignSeqs[i]->clusteredIndexes.size(); j++) { int indexOfSeq = params->alignSeqs[i]->clusteredIndexes[j]; string repName = params->alignSeqs[indexOfSeq]->name; string dupNames = ""; map::iterator itDupName = optionalNameMap.find(repName); if (itDupName != optionalNameMap.end()) { dupNames = itDupName->second; clusteredNames += dupNames + ","; } } clusteredNames = clusteredNames.substr(0,clusteredNames.length()-1); //remove last comma params->newNName->write(params->alignSeqs[i]->name + '\t' + clusteredNames + '\n'); } } } } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "printData"); exit(1); } } /**************************************************************************************************/ bool fillWeighted(preClusterData* params, string fastafileName, string groupOrCountFile){ try { set lengths; map counts; if (params->hasCount) { CountTable ct; ct.readTable(groupOrCountFile, false, true); //don't read groups because it only contains one group counts = ct.getNameMap(); params->util.mothurRemove(groupOrCountFile); } ifstream in; params->util.openInputFile(fastafileName, in); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { map::iterator it = counts.find(seq.getName()); if (it != counts.end()) { vector clusteredIndexes; //don't use indexes for precluster with count file if (!params->hasCount) { clusteredIndexes.push_back(params->alignSeqs.size()); } seqPNode* tempNode = new seqPNode(seq.getName(), seq.getAligned(), it->second, clusteredIndexes); params->alignSeqs.push_back(tempNode); lengths.insert(seq.getAligned().length()); } } } in.close(); params->length = *(lengths.begin()); if (lengths.size() > 1) { params->align_method = "unaligned"; return false; } //unaligned else if (lengths.size() == 1) { params->align_method = "aligned"; filterSeqs(params->alignSeqs, params->length, params->m); return true; } //aligned return true; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "fillWeighted"); exit(1); } } /**************************************************************************************************/ long long driverGroups(preClusterData* params){ try { long long numSeqs = 0; //precluster each group for (map >::iterator it = params->parsedFiles.begin(); it != params->parsedFiles.end(); it++) { if (params->m->getControl_pressed()) { return numSeqs; } string thisGroup = it->first; params->m->mothurOut("\nProcessing group " + thisGroup + ":\n"); time_t start = time(nullptr); bool aligned = false; string thisGroupsFasta = it->second[0]; map thisGroupsNameMap; if (params->hasCount) { string thisGroupsCount = it->second[1]; aligned = fillWeighted(params, thisGroupsFasta, thisGroupsCount); params->util.mothurRemove(thisGroupsCount); } params->util.mothurRemove(thisGroupsFasta); //sort seqs by number of identical seqs sort(params->alignSeqs.begin(), params->alignSeqs.end(), comparePriorityAbundance); long long num = params->alignSeqs.size(); numSeqs += num; if (params->m->getControl_pressed()) { return 0; } if (params->align_method == "aligned") { if (params->diffs > params->length) { params->m->mothurOut("[ERROR]: diffs is greater than your sequence length.\n"); params->m->setControl_pressed(true); return 0; } } string extension = thisGroup+".map"; long long count = process(thisGroup+"\t", params->newMName+extension, params); if(params->pc_method != "deblur"){ params->outputNames.push_back(params->newMName+extension); params->outputTypes["map"].push_back(params->newMName+extension); } if (params->m->getControl_pressed()) { return 0; } params->m->mothurOut("Total number of sequences before pre.cluster was " + toString(num) + ".\n"); params->m->mothurOut("pre.cluster removed " + toString(count) + " sequences.\n\n"); printData(thisGroup, params, thisGroupsNameMap); for (int i = 0; i < params->alignSeqs.size(); i++) { delete params->alignSeqs[i]; } params->alignSeqs.clear(); params->m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to cluster " + toString(num) + " sequences.\n"); } return numSeqs; } catch(exception& e) { params->m->errorOut(e, "PreClusterCommand", "driverGroups"); exit(1); } } /**************************************************************************************************/ //only called with count table including groups unordered_set PreClusterCommand::mergeGroupCounts(string newcount, string newname){ try { m->mothurOut("\nDeconvoluting count table results...\n"); CountTable ct; vector groups; ct.testGroups(countfile, groups); ct.readTable(countfile, false, true); //read table no groups for (int i = 0; i < groups.size(); i++) { ct.addGroup(groups[i]); } //add groups ct.zeroOutTable(); ifstream inNames; util.openInputFile(newname, inNames); /* newname looks like: groupName seqName seqCountForGroup FDF6 seq1 35 seq1 has an abundance of 35 in group FDF6 */ time_t start = time(nullptr); long long count = 0; string group, unique_sequence; int numDups; //build table unordered_set namesOfSeqs; while (!inNames.eof()) { if (m->getControl_pressed()) { break; } inNames >> group; gobble(inNames); inNames >> unique_sequence; gobble(inNames); inNames >> numDups; gobble(inNames); ct.setAbund(unique_sequence, group, numDups); count++; namesOfSeqs.insert(unique_sequence); //report progress if((count) % 1000 == 0){ m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress if((count) % 1000 != 0){ m->mothurOutJustToScreen(toString(count) + "\n"); } inNames.close(); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to merge " + toString(count) + " sequences group data."); start = time(nullptr); ct.printTable(newcount); util.mothurRemove(newname); return namesOfSeqs; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "mergeGroupCounts"); exit(1); } } /**************************************************************************************************/ void PreClusterCommand::createProcessesGroups(map >& parsedFiles, vector groups, string newNName, string newMFile) { try { //sanity check if (groups.size() < processors) { processors = groups.size(); m->mothurOut("Reducing processors to " + toString(groups.size()) + ".\n"); } //divide the groups between the processors vector lines; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; auto synchronizedNameFile = std::make_shared(newNName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadNameWriter = new OutputWriter(synchronizedNameFile); vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[i+1].start; j < lines[i+1].end; j++) { map >::iterator it = parsedFiles.find(groups[j]); if (it != parsedFiles.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } preClusterData* dataBundle = new preClusterData(thisGroupsParsedFiles, fastafile, countfile, pc_method, align_method, clump, threadNameWriter, newMFile, thisGroups); dataBundle->setVariables(diffs, pc_method, align_method, align, match, misMatch, gapOpen, gapExtend, alpha, delta, error_rate, indel_prob, max_indels, error_dist); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverGroups, dataBundle)); } OutputWriter* threadNameWriter = new OutputWriter(synchronizedNameFile); vector thisGroups; map > thisGroupsParsedFiles; for (int j = lines[0].start; j < lines[0].end; j++) { map >::iterator it = parsedFiles.find(groups[j]); if (it != parsedFiles.end()) { thisGroupsParsedFiles[groups[j]] = (it->second); thisGroups.push_back(groups[j]); } else { m->mothurOut("[ERROR]: missing files for group " + groups[j] + ", skipping\n"); } } preClusterData* dataBundle = new preClusterData(thisGroupsParsedFiles, fastafile, countfile, pc_method, align_method, clump, threadNameWriter, newMFile, thisGroups); dataBundle->setVariables(diffs, pc_method, align_method, align, match, misMatch, gapOpen, gapExtend, alpha, delta, error_rate, indel_prob, max_indels, error_dist); driverGroups(dataBundle); outputNames.insert(outputNames.end(), dataBundle->outputNames.begin(), dataBundle->outputNames.end()); for (itTypes = dataBundle->outputTypes.begin(); itTypes != dataBundle->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); delete data[i]->newNName; outputNames.insert(outputNames.end(), data[i]->outputNames.begin(), data[i]->outputNames.end()); for (itTypes = data[i]->outputTypes.begin(); itTypes != data[i]->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete data[i]; delete workerThreads[i]; } delete threadNameWriter; delete dataBundle; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "createProcessesGroups"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/preclustercommand.h000077500000000000000000000037111424121717000221270ustar00rootroot00000000000000#ifndef PRECLUSTERCOMMAND_H #define PRECLUSTERCOMMAND_H /* * preclustercommand.h * Mothur * * Created by westcott on 12/21/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" #include "sequenceparser.h" #include "sequencecountparser.h" #include "alignment.hpp" #include "gotohoverlap.hpp" #include "needlemanoverlap.hpp" #include "noalign.hpp" #include "filters.h" #include "getseqscommand.h" //************************************************************/ class PreClusterCommand : public Command { public: PreClusterCommand(string); ~PreClusterCommand(){} vector setParameters(); string getCommandName() { return "pre.cluster"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nhttp://www.mothur.org/wiki/Pre.cluster"; } string getDescription() { return "implements a pseudo-single linkage algorithm with the goal of removing sequences that are likely due to pyrosequencing errors"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int diffs, length, processors; float match, misMatch, gapOpen, gapExtend, alpha, delta, error_rate, indel_prob, max_indels; vector error_dist; bool abort, bygroup; string fastafile, namefile, groupfile, countfile, pc_method, align_method, align, clump; vector outputNames; void createProcessesGroups(map >&, vector, string, string); unordered_set mergeGroupCounts(string, string); void printFasta(string newFastaFileName, unordered_set accnosFile); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/primerdesigncommand.cpp000077500000000000000000001437511424121717000227730ustar00rootroot00000000000000// // primerdesigncommand.cpp // Mothur // // Created by Sarah Westcott on 1/18/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "primerdesigncommand.h" //********************************************************************************************************************** vector PrimerDesignCommand::setParameters(){ try { CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","summary-list",false,true,true); parameters.push_back(plist); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter plength("length", "Number", "", "18", "", "", "","",false,false); parameters.push_back(plength); CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm); CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors); CommandParameter potunumber("otulabel", "String", "", "", "", "", "","",false,true,true); parameters.push_back(potunumber); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["list"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string PrimerDesignCommand::getHelpString(){ try { string helpString = ""; helpString += "The primer.design allows you to identify sequence fragments that are specific to particular OTUs.\n"; helpString += "The primer.design command parameters are: list, fasta, name, count, otulabel, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n"; helpString += "The list parameter allows you to provide a list file and is required.\n"; helpString += "The fasta parameter allows you to provide a fasta file and is required.\n"; helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n"; helpString += "The label parameter is used to indicate the label you want to use from your list file.\n"; helpString += "The otulabel parameter is used to indicate the otu you want to use from your list file. It is required.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n"; helpString += "The mintm parameter is used to indicate minimum melting temperature.\n"; helpString += "The maxtm parameter is used to indicate maximum melting temperature.\n"; helpString += "The processors parameter allows you to indicate the number of processors you want to use. Default=1.\n"; helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n"; helpString += "The primer.desing command should be in the following format: primer.design(list=yourListFile, fasta=yourFastaFile, name=yourNameFile)\n"; helpString += "primer.design(list=final.an.list, fasta=final.fasta, name=final.names, label=0.03)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string PrimerDesignCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[distance],otu.cons.fasta"; } else if (type == "summary") { pattern = "[filename],[distance],primer.summary"; } else if (type == "list") { pattern = "[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** PrimerDesignCommand::PrimerDesignCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } //get fastafile - it is required fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort=true; } else if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastafile); } //get listfile - it is required listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort=true; } else if (listfile == "not found") { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current listfile and the list parameter is required.\n"); abort = true; } }else { current->setListFile(listfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if (outputdir == ""){ outputdir = util.hasPath(listfile); } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "length"); if (temp == "not found") { temp = "18"; } util.mothurConvert(temp, length); temp = validParameter.valid(parameters, "mintm"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, minTM); temp = validParameter.valid(parameters, "maxtm"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxTM); otulabel = validParameter.valid(parameters, "otulabel"); if (otulabel == "not found") { otulabel = ""; } if (otulabel == "") { m->mothurOut("[ERROR]: You must provide an OTU label, aborting.\n"); abort = true; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } } } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand"); exit(1); } } //********************************************************************************************************************** int PrimerDesignCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); ////////////////////////////////////////////////////////////////////////////// // get file inputs // ////////////////////////////////////////////////////////////////////////////// //reads list file and selects the label the users specified or the first label ListVector* list = getListVector(); vector binLabels = list->getLabels(); int binIndex = findIndex(otulabel, binLabels); if (binIndex == -1) { m->mothurOut("[ERROR]: You selected an OTU label that is not in your in your list file, quitting.\n"); return 0; } map nameMap; long long numSeqs; //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count. //list file should have all seqs if namefile was used to create it and only uniques in count file was used. if (namefile != "") { unsigned long int temp; nameMap = util.readNames(namefile, temp); numSeqs = temp; } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); numSeqs = ct.getNumUniqueSeqs(); nameMap = ct.getNameMap(); }else { numSeqs = list->getNumSeqs(); } //sanity check if (numSeqs != list->getNumSeqs()) { if (namefile != "") { m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your name file contains " + toString(numSeqs) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name file when you clustered? \n"); } else if (countfile != "") { m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your count file contains " + toString(numSeqs) + " unique sequences, aborting. Do you have the correct files? Perhaps you forgot to include the count file when you clustered? \n"); } m->setControl_pressed(true); } if (m->getControl_pressed()) { delete list; return 0; } ////////////////////////////////////////////////////////////////////////////// // process data // ////////////////////////////////////////////////////////////////////////////// m->mothurOut("\nFinding consensus sequences for each otu...\n"); map seq2Bin = getSequenceBinAssignments(list, nameMap); vector conSeqs = createProcessesConSeqs(nameMap, seq2Bin, binLabels); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); variables["[distance]"] = list->getLabel(); string consFastaFile = getOutputFileName("fasta", variables); outputNames.push_back(consFastaFile); outputTypes["fasta"].push_back(consFastaFile); ofstream out; util.openOutputFile(consFastaFile, out); for (int i = 0; i < conSeqs.size(); i++) { conSeqs[i].printSequence(out); } out.close(); set primers = getPrimer(conSeqs[binIndex]); if (m->getControl_pressed()) { delete list; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } string consSummaryFile = getOutputFileName("summary", variables); outputNames.push_back(consSummaryFile); outputTypes["summary"].push_back(consSummaryFile); ofstream outSum; util.openOutputFile(consSummaryFile, outSum); outSum << "PrimerOtu: " << otulabel << " Members: " << list->get(binIndex) << endl << "Primers\tminTm\tmaxTm" << endl; //find min and max melting points vector minTms; vector maxTms; string primerString = ""; for (set::iterator it = primers.begin(); it != primers.end();) { double minTm, maxTm; findMeltingPoint(*it, minTm, maxTm); if (util.isEqual(minTM, -1) && util.isEqual(maxTM, -1)) { //user did not set min or max Tm so save this primer minTms.push_back(minTm); maxTms.push_back(maxTm); outSum << *it << '\t' << minTm << '\t' << maxTm << endl; it++; }else if (util.isEqual(minTM, -1) && (maxTm <= maxTM)){ //user set max and no min, keep if below max minTms.push_back(minTm); maxTms.push_back(maxTm); outSum << *it << '\t' << minTm << '\t' << maxTm << endl; it++; }else if (util.isEqual(maxTM, -1) && (minTm >= minTM)){ //user set min and no max, keep if above min minTms.push_back(minTm); maxTms.push_back(maxTm); outSum << *it << '\t' << minTm << '\t' << maxTm << endl; it++; }else if ((maxTm <= maxTM) && (minTm >= minTM)) { //keep if above min and below max minTms.push_back(minTm); maxTms.push_back(maxTm); outSum << *it << '\t' << minTm << '\t' << maxTm << endl; it++; }else { primers.erase(it++); } //erase because it didn't qualify } outSum << "\nOTU\tPrimer\tStart\tEnd\tLength\tMismatches\tminTm\tmaxTm\n"; outSum.close(); m->mothurOut("\nProcessing OTUs...\n"); //check each otu's conseq for each primer in otunumber set otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs, binIndex, binLabels); if (m->getControl_pressed()) { delete list; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //print new list file map mvariables; mvariables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listfile)); mvariables["[extension]"] = util.getExtension(listfile); string newListFile = getOutputFileName("list", mvariables); ofstream outListTemp; util.openOutputFile(newListFile+".temp", outListTemp); outListTemp << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()); string headers = "label\tnumOtus"; for (int j = 0; j < list->getNumBins(); j++) { if (m->getControl_pressed()) { break; } //good otus if (otuToRemove.count(j) == 0) { string bin = list->get(j); if (bin != "") { outListTemp << '\t' << bin; headers += '\t' + binLabels[j]; } } } outListTemp << endl; outListTemp.close(); ofstream outList; util.openOutputFile(newListFile, outList); outList << headers << endl; outList.close(); util.appendFiles(newListFile+".temp", newListFile); util.mothurRemove(newListFile+".temp"); outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile); delete list; if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to process " + toString(list->getNumBins()) + " OTUs.\n"); //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "execute"); exit(1); } } //********************************************************************/ //used http://www.biophp.org/minitools/melting_temperature/ as a reference to substitute degenerate bases // in order to find the min and max Tm values. //Tm = 64.9°C + 41°C x (number of G’s and C’s in the primer – 16.4)/N /* A = adenine * C = cytosine * G = guanine * T = thymine * R = G A (purine) * Y = T C (pyrimidine) * K = G T (keto) * M = A C (amino) * S = G C (strong bonds) * W = A T (weak bonds) * B = G T C (all but A) * D = G A T (all but C) * H = A C T (all but G) * V = G C A (all but T) * N = A G C T (any) */ int PrimerDesignCommand::findMeltingPoint(string primer, double& minTm, double& maxTm){ try { string minTmprimer = primer; string maxTmprimer = primer; //find minimum Tm string substituting for degenerate bases for (int i = 0; i < minTmprimer.length(); i++) { minTmprimer[i] = toupper(minTmprimer[i]); if (minTmprimer[i] == 'Y') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'R') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'W') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'K') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'M') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'D') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'V') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'H') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'B') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'N') { minTmprimer[i] = 'A'; } else if (minTmprimer[i] == 'S') { minTmprimer[i] = 'G'; } } //find maximum Tm string substituting for degenerate bases for (int i = 0; i < maxTmprimer.length(); i++) { maxTmprimer[i] = toupper(maxTmprimer[i]); if (maxTmprimer[i] == 'Y') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'R') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'W') { maxTmprimer[i] = 'A'; } else if (maxTmprimer[i] == 'K') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'M') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'D') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'V') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'H') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'B') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'N') { maxTmprimer[i] = 'G'; } else if (maxTmprimer[i] == 'S') { maxTmprimer[i] = 'G'; } } int numGC = 0; for (int i = 0; i < minTmprimer.length(); i++) { if (minTmprimer[i] == 'G') { numGC++; } else if (minTmprimer[i] == 'C') { numGC++; } } minTm = 64.9 + 41 * (numGC - 16.4) / (double) minTmprimer.length(); numGC = 0; for (int i = 0; i < maxTmprimer.length(); i++) { if (maxTmprimer[i] == 'G') { numGC++; } else if (maxTmprimer[i] == 'C') { numGC++; } } maxTm = 64.9 + 41 * (numGC - 16.4) / (double) maxTmprimer.length(); return 0; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "findMeltingPoint"); exit(1); } } //********************************************************************/ //find all primers for the given sequence set PrimerDesignCommand::getPrimer(Sequence primerSeq){ try { set primers; string rawSequence = primerSeq.getUnaligned(); for (int j = 0; j < rawSequence.length()-length; j++){ if (m->getControl_pressed()) { break; } string primer = rawSequence.substr(j, length); primers.insert(primer); } return primers; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getPrimer"); exit(1); } } //********************************************************************/ /* A = adenine * C = cytosine * G = guanine * T = thymine * R = G A (purine) * Y = T C (pyrimidine) * K = G T (keto) * M = A C (amino) * S = G C (strong bonds) * W = A T (weak bonds) * B = G T C (all but A) * D = G A T (all but C) * H = A C T (all but G) * V = G C A (all but T) * N = A G C T (any) */ int countDiffs(string oligo, string seq, MothurOut* m){ try { int length = oligo.length(); int countDiffs = 0; for(int i=0;ierrorOut(e, "PrimerDesignCommand", "countDiffs"); exit(1); } } //********************************************************************/ //search for a primer over the sequence string bool findPrimer(string rawSequence, string primer, vector& primerStart, vector& primerEnd, vector& mismatches, int length, int pdiffs, MothurOut* m){ try { bool foundAtLeastOne = false; //innocent til proven guilty //look for exact match if(rawSequence.length() < primer.length()) { return false; } //search for primer for (int j = 0; j < rawSequence.length()-length; j++){ if (m->getControl_pressed()) { return foundAtLeastOne; } string rawChunk = rawSequence.substr(j, length); int numDiff = countDiffs(primer, rawChunk, m); if(numDiff <= pdiffs){ primerStart.push_back(j); primerEnd.push_back(j+length); mismatches.push_back(numDiff); foundAtLeastOne = true; } } return foundAtLeastOne; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "findPrimer"); exit(1); } } /**************************************************************************************************/ struct primerDesignData { OutputWriter* summaryFile; MothurOut* m; int start; int end; int pdiffs, length, binIndex; set primers; vector minTms, maxTms; set otusToRemove; vector consSeqs; vector binLabels; int numBinsProcessed; Utils util; primerDesignData(){ delete summaryFile; } primerDesignData(OutputWriter* sf, int st, int en, vector min, vector max, set pri, vector seqs, int d, int otun, int l, vector bl) { summaryFile = sf; m = MothurOut::getInstance(); start = st; end = en; pdiffs = d; minTms = min; maxTms = max; primers = pri; consSeqs = seqs; binIndex = otun; length = l; binLabels = bl; numBinsProcessed = 0; } }; //********************************************************************************************************************** void driverPDesign(primerDesignData* params){ try { for (int i = params->start; i < params->end; i++) { if (params->m->getControl_pressed()) { break; } if (i != (params->binIndex)) { int primerIndex = 0; string output = ""; for (set::iterator it = params->primers.begin(); it != params->primers.end(); it++) { vector primerStarts; vector primerEnds; vector mismatches; bool found = findPrimer(params->consSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches, params->length, params->pdiffs, params->m); //if we found it report to the table if (found) { for (int j = 0; j < primerStarts.size(); j++) { output += params->binLabels[i] + '\t' + *it + '\t' + toString(primerStarts[j]) + '\t' + toString(primerEnds[j]) + '\t' + toString(params->length) + '\t' + toString(mismatches[j]) + '\t' + toString(params->minTms[primerIndex]) + '\t' + toString(params->maxTms[primerIndex]) + '\n'; } params->otusToRemove.insert(i); } primerIndex++; } params->summaryFile->write(output); } params->numBinsProcessed++; if((params->numBinsProcessed) % 100 == 0){ params->m->mothurOutJustToScreen(toString(params->numBinsProcessed)+"\n"); } } if((params->numBinsProcessed) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->numBinsProcessed)+"\n"); } } catch(exception& e) { params->m->errorOut(e, "PrimerDesignCommand", "driver"); exit(1); } } /**************************************************************************************************/ set PrimerDesignCommand::createProcesses(string newSummaryFile, vector& minTms, vector& maxTms, set& primers, vector& conSeqs, int binIndex, vector& binLabels) { try { //create array of worker threads vector workerThreads; vector data; //sanity check int numBins = conSeqs.size(); if (numBins < processors) { processors = numBins; } //divide the otus between the processors vector lines; int numOtusPerProcessor = numBins / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numOtusPerProcessor; int endIndex = (i+1) * numOtusPerProcessor; if(i == (processors - 1)){ endIndex = numBins; } lines.push_back(linePair(startIndex, endIndex)); } auto synchronizedOutputSummaryFile = std::make_shared(newSummaryFile, true); //open append //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadSummaryWriter = new OutputWriter(synchronizedOutputSummaryFile); primerDesignData* dataBundle = new primerDesignData(threadSummaryWriter, lines[i+1].start, lines[i+1].end, minTms, maxTms, primers, conSeqs, pdiffs, binIndex, length, binLabels); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverPDesign, dataBundle)); } OutputWriter* threadSummaryWriter = new OutputWriter(synchronizedOutputSummaryFile); primerDesignData* dataBundle = new primerDesignData(threadSummaryWriter, lines[0].start, lines[0].end, minTms, maxTms, primers, conSeqs, pdiffs, binIndex, length, binLabels); driverPDesign(dataBundle); set otusToRemove = dataBundle->otusToRemove; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); otusToRemove.insert(data[i]->otusToRemove.begin(), data[i]->otusToRemove.end()); delete data[i]; delete workerThreads[i]; } delete dataBundle; return otusToRemove; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** int initializeCounts(vector< vector< vector > >& counts, int length, int numBins, MothurOut* m){ try { counts.clear(); //vector< vector< vector > > counts - otu < spot_in_alignment < counts_for_A,T,G,C,Gap > > > for (int i = 0; i < numBins; i++) { vector temp; temp.resize(5, 0); //A,T,G,C,Gap vector< vector > temp2; for (int j = 0; j < length; j++) { temp2.push_back(temp); } counts.push_back(temp2); } return 0; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "initializeCounts"); exit(1); } } /**************************************************************************************************/ struct primerCountsData { map seq2Bin; vector< vector< vector > > counts; // - otu < spot_in_alignment < counts_for_A,T,G,C,Gap > > > MothurOut* m; long long start, end, count, total, alignedLength, numBins; string fastafile; Utils util; map nameMap; vector otuCounts; bool hasNameMap; primerCountsData(){ } primerCountsData(string ff, map nmp, long long st, long long en, map seq2B, int nb) { fastafile = ff; m = MothurOut::getInstance(); start = st; end = en; hasNameMap = false; nameMap = nmp; if (nameMap.size() != 0) { hasNameMap = true; } seq2Bin = seq2B; numBins = nb; count = 0; total = 0; } }; //********************************************************************************************************************** map PrimerDesignCommand::getSequenceBinAssignments(ListVector* list, map& nameMap){ try { map seq2Bin; bool hasNameMap = false; if (nameMap.size() != 0) { hasNameMap = true; } for (int i = 0; i < list->getNumBins(); i++) { string binNames = list->get(i); vector names; util.splitAtComma(binNames, names); //lets be smart and only map the unique names if a name or count file was given to save search time and memory if (hasNameMap) { for (int j = 0; j < names.size(); j++) { map::iterator itNames = nameMap.find(names[j]); if (itNames != nameMap.end()) { //add name because its a unique one seq2Bin[names[j]] = i; } } }else { for (int j = 0; j < names.size(); j++) { seq2Bin[names[j]] = i; } } //map everyone } return seq2Bin; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getSequenceBinAssignments"); exit(1); } } /**************************************************************************************************/ void driverGetCounts(primerCountsData* params){ try { params->otuCounts.resize(params->numBins, 0); params->alignedLength = 0; ifstream in; params->util.openInputFile(params->fastafile, in); in.seekg(params->start); //adjust start if null strings if (params->start == 0) { params->util.zapGremlins(in); gobble(in); } bool done = false; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { if (params->count == 0) { params->alignedLength = seq.getAligned().length(); initializeCounts(params->counts, params->alignedLength, params->numBins, params->m); } else if (params->alignedLength != seq.getAligned().length()) { params->m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned.\n"); params->m->setControl_pressed(true); break; } int num = 1; if (params->hasNameMap) { map::iterator itCount = params->nameMap.find(seq.getName()); if (itCount == params->nameMap.end()) { params->m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name or count file, aborting.\n"); params->m->setControl_pressed(true); break; } else { params->total += itCount->second; num = itCount->second; } }else { params->total++; } //increment counts map::iterator itCount = params->seq2Bin.find(seq.getName()); if (itCount == params->seq2Bin.end()) { params->m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting.\n"); params->m->setControl_pressed(true); break; }else { params->otuCounts[itCount->second] += num; string aligned = seq.getAligned(); for (int i = 0; i < params->alignedLength; i++) { char base = toupper(aligned[i]); if (base == 'A') { params->counts[itCount->second][i][0]+=num; } else if (base == 'T') { params->counts[itCount->second][i][1]+=num; } else if (base == 'G') { params->counts[itCount->second][i][2]+=num; } else if (base == 'C') { params->counts[itCount->second][i][3]+=num; } else { params->counts[itCount->second][i][4]+=num; } } } } params->count++; if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if ((params->count == params->end) || (in.eof())) { break; } #endif } if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } in.close(); } catch(exception& e) { params->m->errorOut(e, "PrimerDesignCommand", "driverGetCounts"); exit(1); } } /**************************************************************************************************/ vector PrimerDesignCommand::createProcessesConSeqs(map& nameMap, map& seq2Bin, vector& binLabels) { try { int numBins = binLabels.size(); vector lines; #if defined NON_WINDOWS vector positions; positions = util.divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numSeqs; vector positions = util.setFilePosFasta(fastafile, numSeqs); if (numSeqs < processors) { processors = numSeqs; m->mothurOut("Reducing processors to " + toString(numSeqs) + ".\n"); } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { if (m->getControl_pressed()) { break; } primerCountsData* dataBundle = new primerCountsData(fastafile, nameMap, lines[i+1].start, lines[i+1].end, seq2Bin, numBins); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverGetCounts, dataBundle)); } primerCountsData* dataBundle = new primerCountsData(fastafile, nameMap, lines[0].start, lines[0].end, seq2Bin, numBins); driverGetCounts(dataBundle); vector< vector< vector > > counts = dataBundle->counts; vector otuCounts = dataBundle->otuCounts; long long total = dataBundle->total; int alignedLength = dataBundle->alignedLength; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); total += data[i]->total; if (m->getControl_pressed()) { break; } if (data[i]->alignedLength != alignedLength) { m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned.\n"); m->setControl_pressed(true); } for (int k = 0; k < numBins; k++) { //for each bin for (int j = 0; j < data[i]->alignedLength; j++) { //for each position for (int l = 0; l < 5; l++) { counts[k][j][l] += data[i]->counts[k][j][l]; } //for each base } otuCounts[k] += data[i]->otuCounts[k]; } delete data[i]; delete workerThreads[i]; } vector conSeqs; if (m->getControl_pressed()) { return conSeqs; } //build consensus seqs for (int i = 0; i < counts.size(); i++) { if (m->getControl_pressed()) { break; } string otuLabel = binLabels[i]; string cons = ""; for (int j = 0; j < counts[i].size(); j++) { cons += getBase(counts[i][j], otuCounts[i]); } Sequence consSeq(otuLabel, cons); conSeqs.push_back(consSeq); } if (m->getControl_pressed()) { conSeqs.clear(); return conSeqs; } return conSeqs; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "createProcessesConSeqs"); exit(1); } } //*************************************************************************************************************** char PrimerDesignCommand::getBase(vector counts, int size){ //A,T,G,C,Gap try{ /* A = adenine * C = cytosine * G = guanine * T = thymine * R = G A (purine) * Y = T C (pyrimidine) * K = G T (keto) * M = A C (amino) * S = G C (strong bonds) * W = A T (weak bonds) * B = G T C (all but A) * D = G A T (all but C) * H = A C T (all but G) * V = G C A (all but T) * N = A G C T (any) */ char conBase = 'N'; //zero out counts that don't make the cutoff float percentage = (100.0 - cutoff) / 100.0; for (int i = 0; i < counts.size(); i++) { float countPercentage = counts[i] / (float) size; if (countPercentage < percentage) { counts[i] = 0; } } //any if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'n'; } //any no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'N'; } //all but T else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'v'; } //all but T no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'V'; } //all but G else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'h'; } //all but G no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'H'; } //all but C else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'd'; } //all but C no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'D'; } //all but A else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'b'; } //all but A no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'B'; } //W = A T (weak bonds) else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'w'; } //W = A T (weak bonds) no gap else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'W'; } //S = G C (strong bonds) else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 's'; } //S = G C (strong bonds) no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'S'; } //M = A C (amino) else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'm'; } //M = A C (amino) no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'M'; } //K = G T (keto) else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'k'; } //K = G T (keto) no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'K'; } //Y = T C (pyrimidine) else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'y'; } //Y = T C (pyrimidine) no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'Y'; } //R = G A (purine) else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'r'; } //R = G A (purine) no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'R'; } //only A else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'a'; } //only A no gap else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'A'; } //only T else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 't'; } //only T no gap else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'T'; } //only G else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'g'; } //only G no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'G'; } //only C else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'c'; } //only C no gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'C'; } //only gap else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = '-'; } //cutoff removed all counts else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'N'; } else{ m->mothurOut("[ERROR]: cannot find consensus base.\n"); } return conBase; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getBase"); exit(1); } } //********************************************************************************************************************** ListVector* PrimerDesignCommand::getListVector(){ try { InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); string lastLabel = list->getLabel(); if (label == "") { label = lastLabel; return list; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((list != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return list; } if(labels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input.getListVector(lastLabel); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); break; } lastLabel = list->getLabel(); //get next line to process //prevent memory leak delete list; list = input.getListVector(); } if (m->getControl_pressed()) { return list; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete list; list = input.getListVector(lastLabel); } return list; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "getListVector"); exit(1); } } //********************************************************************************************************************** int PrimerDesignCommand::findIndex(string binLabel, vector binLabels){ try { int index = -1; for (int i = 0; i < binLabels.size(); i++){ if (m->getControl_pressed()) { return index; } if (util.isLabelEquivalent(binLabel, binLabels[i])) { index = i; break; } } return index; } catch(exception& e) { m->errorOut(e, "PrimerDesignCommand", "findIndex"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/primerdesigncommand.h000077500000000000000000000037451424121717000224360ustar00rootroot00000000000000// // primerdesigncommand.h // Mothur // // Created by Sarah Westcott on 1/18/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_primerdesigncommand_h #define Mothur_primerdesigncommand_h #include "command.hpp" #include "listvector.hpp" #include "inputdata.h" #include "sequence.hpp" #include "alignment.hpp" #include "needlemanoverlap.hpp" /**************************************************************************************************/ class PrimerDesignCommand : public Command { public: PrimerDesignCommand(string); ~PrimerDesignCommand(){} vector setParameters(); string getCommandName() { return "primer.design"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; } string getDescription() { return "identify sequence fragments that are specific to particular OTUs"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines, large; int cutoff, pdiffs, length, processors, alignedLength; string listfile, otulabel, namefile, countfile, fastafile, label; double minTM, maxTM; vector outputNames; char getBase(vector counts, int size); ListVector* getListVector(); set getPrimer(Sequence); int findMeltingPoint(string primer, double&, double&); set createProcesses(string, vector&, vector&, set&, vector&, int, vector&); map getSequenceBinAssignments(ListVector* list, map& nameMap); vector createProcessesConSeqs(map&, map&, vector&); int findIndex(string binLabel, vector binLabels); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/quitcommand.cpp000077500000000000000000000021271424121717000212540ustar00rootroot00000000000000/* * quitcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "quitcommand.h" //********************************************************************************************************************** QuitCommand::QuitCommand(string option) : Command() { abort = false; calledHelp = false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } } //********************************************************************************************************************** QuitCommand::~QuitCommand(){} //********************************************************************************************************************** int QuitCommand::execute(){ if (abort) { return 0; } return 1; } //********************************************************************************************************************** mothur-1.48.0/source/commands/quitcommand.h000077500000000000000000000020541424121717000207200ustar00rootroot00000000000000#ifndef QUITCOMMAND_H #define QUITCOMMAND_H /* * quitcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" /* The quit() command: The quit command terminates the mothur program. The quit command should be in the following format: quit (). */ class QuitCommand : public Command { public: QuitCommand(string); ~QuitCommand(); vector setParameters() { return outputNames; } //dummy, doesn't really do anything string getCommandName() { return "quit"; } string getCommandCategory() { return "Hidden"; } string getHelpString() { return "The quit command will terminate mothur and should be in the following format: quit() or quit. \n"; } string getOutputPattern(string) { return ""; } string getCitation() { return "no citation"; } string getDescription() { return "quit"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; vector outputNames; }; #endif mothur-1.48.0/source/commands/rarefactcommand.cpp000077500000000000000000001001731424121717000220610ustar00rootroot00000000000000/* * rarefactcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "rarefactcommand.h" #include "ace.h" #include "sobs.h" #include "nseqs.h" #include "chao1.h" #include "bootstrap.h" #include "simpson.h" #include "simpsoneven.h" #include "heip.h" #include "smithwilson.h" #include "invsimpson.h" #include "npshannon.h" #include "shannoneven.h" #include "shannon.h" #include "jackknife.h" #include "coverage.h" #include "shannonrange.h" //********************************************************************************************************************** vector RareFactCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(psabund); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-shannonrange", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["rarefaction"] = tempOutNames; outputTypes["r_chao"] = tempOutNames; outputTypes["r_ace"] = tempOutNames; outputTypes["r_jack"] = tempOutNames; outputTypes["r_shannon"] = tempOutNames; outputTypes["r_shannoneven"] = tempOutNames; outputTypes["r_shannonrange"] = tempOutNames; outputTypes["r_heip"] = tempOutNames; outputTypes["r_smithwilson"] = tempOutNames; outputTypes["r_npshannon"] = tempOutNames; outputTypes["r_simpson"] = tempOutNames; outputTypes["r_simpsoneven"] = tempOutNames; outputTypes["r_invsimpson"] = tempOutNames; outputTypes["r_bootstrap"] = tempOutNames; outputTypes["r_coverage"] = tempOutNames; outputTypes["r_nseqs"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RareFactCommand::getHelpString(){ try { ValidCalculators validCalculator; string helpString = ""; helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, groupmode, groups, processors and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "The rarefaction.single command should be in the following format: \n"; helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n"; helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n"; helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n"; helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; validCalculator.printCalc("rarefaction"); helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RareFactCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "rarefaction") { pattern = "[filename],rarefaction"; } else if (type == "r_chao") { pattern = "[filename],r_chao"; } else if (type == "r_ace") { pattern = "[filename],r_ace"; } else if (type == "r_jack") { pattern = "[filename],r_jack"; } else if (type == "r_shannon") { pattern = "[filename],r_shannon"; } else if (type == "r_shannoneven") { pattern = "[filename],r_shannoneven"; } else if (type == "r_smithwilson") { pattern = "[filename],r_smithwilson"; } else if (type == "r_npshannon") { pattern = "[filename],r_npshannon"; } else if (type == "r_shannonrange"){ pattern = "[filename],r_shannonrange"; } else if (type == "r_simpson") { pattern = "[filename],r_simpson"; } else if (type == "r_simpsoneven") { pattern = "[filename],r_simpsoneven"; } else if (type == "r_invsimpson") { pattern = "[filename],r_invsimpson"; } else if (type == "r_bootstrap") { pattern = "[filename],r_bootstrap"; } else if (type == "r_coverage") { pattern = "[filename],r_coverage"; } else if (type == "r_nseqs") { pattern = "[filename],r_nseqs"; } else if (type == "r_heip") { pattern = "[filename],r_heip"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RareFactCommand::RareFactCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command.\n"); abort = true; } } } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sobs"; } else { if (calc == "default") { calc = "sobs"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "abund"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, abund); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, nIters); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "1"; } util.mothurConvert(temp, alpha); if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2.\n"); abort=true; } temp = validParameter.valid(parameters, "groupmode"); if (temp == "not found") { temp = "T"; } groupMode = util.isTrue(temp); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } } } catch(exception& e) { m->errorOut(e, "RareFactCommand", "RareFactCommand"); exit(1); } } //********************************************************************************************************************** int RareFactCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); map > labelToEnds; if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); } else { inputFileNames = parseSharedFile(sharedfile, labelToEnds); format = "rabund"; } if (m->getControl_pressed()) { return 0; } map file2Group; //index in outputNames[i] -> group for (int p = 0; p < inputFileNames.size(); p++) { string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(inputFileNames[p])); map variables; variables["[filename]"] = fileNameRoot; if (m->getControl_pressed()) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (inputFileNames.size() > 1) { m->mothurOut("\nProcessing group " + Groups[p] + "\n\n"); } fillRDisplays(variables, file2Group, p); //if the users entered no valid calculators don't execute command if (rDisplays.size() == 0) { for(int i=0;i processedLabels; set userLabels = labels; string lastLabel = ""; OrderVector* order = util.getNextOrder(input, allLines, userLabels, processedLabels, lastLabel); while (order != nullptr) { if (m->getControl_pressed()) { delete order; break; } map >::iterator itEndings = labelToEnds.find(order->getLabel()); set ends; if (itEndings != labelToEnds.end()) { ends = itEndings->second; } Rarefact* rCurve = new Rarefact(*order, rDisplays, ends, processors); rCurve->getCurve(freq, nIters); delete rCurve; delete order; order = util.getNextOrder(input, allLines, userLabels, processedLabels, lastLabel); } //delete displays for(int i=0;i 1) { for (int p = 0; p < inputFileNames.size(); p++) { util.mothurRemove(inputFileNames[p]); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //create summary file containing all the groups data for each label - this function just combines the info from the files already created. if ((sharedfile != "") && (groupMode)) { outputNames = createGroupFile(outputNames, file2Group); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to run rarefaction.single.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "execute"); exit(1); } } //********************************************************************************************************************** void RareFactCommand::fillRDisplays(map variables, map& file2Group, int thisGroup) { try { ValidCalculators validCalculator; for (int i=0; i 1) { file2Group[outputNames.size()-1] = Groups[thisGroup]; } } } }catch(exception& e) { m->errorOut(e, "RareFactCommand", "fillCDisplays"); exit(1); } } //********************************************************************************************************************** vector RareFactCommand::createGroupFile(vector& outputNames, map file2Group) { try { vector newFileNames; //find different types of files map > typesFiles; map > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci. vector groupNames; for (int i = 0; i < outputNames.size(); i++) { string extension = util.getExtension(outputNames[i]); string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + extension; util.mothurRemove(combineFileName); //remove old file ifstream in; util.openInputFile(outputNames[i], in); string labels = util.getline(in); gobble(in); vector theseLabels = util.splitWhiteSpace(labels); vector< vector > allLabels; vector thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping for (int j = 1; j < theseLabels.size()-1; j++) { if (theseLabels[j+1] == "lci") { thisSet.push_back(theseLabels[j]); thisSet.push_back(theseLabels[j+1]); thisSet.push_back(theseLabels[j+2]); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + util.getStringFromVector(thisSet, " ") + "\n"); } j++; j++; }else{ //no lci or hci for this calc. thisSet.push_back(theseLabels[j]); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + util.getStringFromVector(thisSet, " ") + "\n"); } } allLabels.push_back(thisSet); thisSet.clear(); } fileLabels[combineFileName] = allLabels; map >::iterator itfind = typesFiles.find(extension); if (itfind != typesFiles.end()) { (itfind->second)[outputNames[i]] = file2Group[i]; }else { map temp; temp[outputNames[i]] = file2Group[i]; typesFiles[extension] = temp; } if (!(util.inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); } } //for each type create a combo file for (map >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) { ofstream out; string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + it->first; util.openOutputFileAppend(combineFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); newFileNames.push_back(combineFileName); map thisTypesFiles = it->second; //it->second maps filename to group set numSampledSet; //open each type summary file map > > > files; //maps file name to lines in file int maxLines = 0; for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { string thisfilename = itFileNameGroup->first; string group = itFileNameGroup->second; if (m->getDebug()) { m->mothurOut("[DEBUG]: " + thisfilename + "\t" + group + "\n"); } ifstream temp; util.openInputFile(thisfilename, temp); //read through first line - labels string dummy = util.getline(temp); gobble(temp); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + dummy + "\t" + toString(fileLabels[combineFileName].size()) + "\n"); } // map > > thisFilesLines; //numSampled -> while (!temp.eof()){ float numSampled = 0; string thisLineInfo = util.getline(temp); gobble(temp); vector parsedLine = util.splitWhiteSpace(thisLineInfo); util.mothurConvert(parsedLine[0], numSampled); vector< vector > theseReads; vector thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear(); int columnIndex = 1; //0 -> numSampled, 1 -> 0.03, 2 -> 0.03lci, 3 -> 0.03hci, 4 -> 0.05, 5 -> 0.05lci, 6 -> 0.05hci for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A vector reads; string next = ""; int numColumnsPerLabel = fileLabels[combineFileName][k].size(); // 0.03 lci hci ... 0.05 lci hci -> 3 columns for (int l = 0; l < numColumnsPerLabel; l++) { reads.push_back(parsedLine[columnIndex]); columnIndex++; } theseReads.push_back(reads); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + util.getStringFromVector(reads, " ") + "\n"); } } thisFilesLines[numSampled] = theseReads; gobble(temp); numSampledSet.insert(numSampled); } files[group] = thisFilesLines; //save longest file for below if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); } temp.close(); util.mothurRemove(thisfilename); } //output new labels line out << fileLabels[combineFileName][0][0]; for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A for (int n = 0; n < groupNames.size(); n++) { // for each group for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels out << '\t' << fileLabels[combineFileName][k][l] << '-' << groupNames[n]; } } } out << endl; //for each label for (set::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) { out << (*itNumSampled); if (m->getControl_pressed()) { break; } for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk //grab data for each group for (int n = 0; n < groupNames.size(); n++) { string group = groupNames[n]; map > >::iterator itLine = files[group].find(*itNumSampled); if (itLine != files[group].end()) { for (int l = 0; l < (itLine->second)[k].size(); l++) { out << '\t' << (itLine->second)[k][l]; } }else { for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { out << "\tNA"; } } } } out << endl; } out.close(); } //return combine file name return newFileNames; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "createGroupFile"); exit(1); } } //********************************************************************************************************************** vector RareFactCommand::parseSharedFile(string filename, map >& label2Ends) { try { vector filenames; map files; map::iterator it3; InputData input(filename, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); Groups = lookup->getNamesGroups(); string sharedFileRoot = util.getRootName(filename); //clears file before we start to write to it below for (int i=0; i data = lookup->getSharedRAbundVectors(); for (int i = 0; i < data.size(); i++) { ofstream temp; string group = Groups[i]; util.openOutputFileAppend(files[group], temp); data[i]->getRAbundVector().print(temp); temp.close(); label2Ends[lookup->getLabel()].insert(data[i]->getNumSeqs()); } for (int i = 0; i < data.size(); i++) { delete data[i]; } delete lookup; lookup = input.getSharedRAbundVectors(); } return filenames; } catch(exception& e) { m->errorOut(e, "RareFactCommand", "parseSharedFile"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/rarefactcommand.h000077500000000000000000000031201424121717000215200ustar00rootroot00000000000000#ifndef RAREFACTCOMMAND_H #define RAREFACTCOMMAND_H /* * rarefactcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "ordervector.hpp" #include "inputdata.h" #include "rarefact.h" #include "display.h" #include "validcalculator.h" class RareFactCommand : public Command { public: RareFactCommand(string); ~RareFactCommand(){} vector setParameters(); string getCommandName() { return "rarefaction.single"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Magurran AE (2004). Measuring biological diversity. Blackwell Pub.: Malden, Ma. \nhttp://www.mothur.org/wiki/Rarefaction.single"; } string getDescription() { return "generate intra-sample rarefaction curves using a re-sampling without replacement approach"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector rDisplays; int nIters, abund, processors, alpha; float freq; bool abort, allLines, groupMode; set labels; //holds labels to be used string label, calc, sharedfile, listfile, rabundfile, sabundfile, format, inputfile; vector Estimators; vector inputFileNames, outputNames; vector Groups; vector parseSharedFile(string, map >&); vector createGroupFile(vector&, map); void fillRDisplays(map, map&, int); }; #endif mothur-1.48.0/source/commands/rarefactsharedcommand.cpp000077500000000000000000000767351424121717000232700ustar00rootroot00000000000000/* * rarefactsharedcommand.cpp * Dotur * * Created by Sarah Westcott on 1/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "rarefactsharedcommand.h" #include "sharedsobs.h" #include "sharednseqs.h" #include "subsample.h" //********************************************************************************************************************** vector RareFactSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pshared); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter psubsampleiters("subsampleiters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(psubsampleiters); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pjumble); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["sharedrarefaction"] = tempOutNames; outputTypes["sharedr_nseqs"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RareFactSharedCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble, groupmode, processors and calc. shared is required if there is no current sharedfile. \n"; helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n"; helpString += "The rarefaction command should be in the following format: \n"; helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n"; helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n"; helpString += "Example rarefaction.shared(label=unique-0.01-0.03, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n"; helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n"; helpString += "The subsampleiters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n"; helpString += validCalculator.printCalc("sharedrarefaction"); helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RareFactSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sharedrarefaction") { pattern = "[filename],shared.rarefaction"; } else if (type == "sharedr_nseqs") { pattern = "[filename],shared.r_nseqs"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RareFactSharedCommand::RareFactSharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { abort = true; designfile = ""; } else if (designfile == "not found") { designfile = ""; } else { current->setDesignFile(designfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sharedobserved"; } else { if (calc == "default") { calc = "sharedobserved"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); if (Sets.size() != 0) { if (Sets[0] != "all") { Sets.clear(); } } } string temp; temp = validParameter.valid(parameters, "freq"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, freq); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, nIters); temp = validParameter.valid(parameters, "jumble"); if (temp == "not found") { temp = "T"; } if (util.isTrue(temp)) { jumble = true; } else { jumble = false; } temp = validParameter.valid(parameters, "groupmode"); if (temp == "not found") { temp = "T"; } groupMode = util.isTrue(temp); temp = validParameter.valid(parameters, "subsampleiters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (subsample == false) { iters = 1; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand"); exit(1); } } //********************************************************************************************************************** int RareFactSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } DesignMap designMap; if (designfile == "") { //fake out designMap to run with process process(designMap, ""); }else { designMap.read(designfile); if (Sets.size() == 0) { Sets = designMap.getCategory(); } for (int i = 0; i < Sets.size(); i++) { process(designMap, Sets[i]); } if (groupMode) { outputNames = createGroupFile(outputNames); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "execute"); exit(1); } } //********************************************************************************************************************** int RareFactSharedCommand::process(DesignMap& designMap, string thisSet){ try { Rarefact* rCurve; vector rDisplays; InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel, thisSet); Groups = lookup->getNamesGroups(); if (lookup->size() < 2) { m->mothurOut("[ERROR]: I cannot run the command without at least 2 valid groups."); delete lookup; return 0; } string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(sharedfile)); vector newGroups = lookup->getNamesGroups(); if (thisSet != "") { //make groups only filled with groups from this set so that's all inputdata will read vector thisSets; thisSets.push_back(thisSet); newGroups = designMap.getNamesGroups(thisSets); fileNameRoot += thisSet + "."; } SharedRAbundVectors* subset = new SharedRAbundVectors(); subset->setLabels(lookup->getLabel()); subset->setOTUNames(lookup->getOTUNames()); vector data = lookup->getSharedRAbundVectors(); if (thisSet != "") {//remove unwanted groups for (int i = 0; i < data.size(); i++) { if (util.inUsersGroups(data[i]->getGroup(), newGroups)) { subset->push_back(data[i]); } } subset->eliminateZeroOTUS(); }else { for (int i = 0; i < data.size(); i++) { subset->push_back(data[i]); } } /******************************************************/ if (subsample) { //user has not set size, set size = smallest samples size if (subsampleSize == -1) { subsampleSize = subset->getNumSeqsSmallestGroup(); m->mothurOut("Setting subsample size to " + toString(subsampleSize) + ".\n\n"); } subset->removeGroups(subsampleSize); newGroups = subset->getNamesGroups(); if (subset->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n\n"); m->setControl_pressed(true); return 0; } } /******************************************************/ map variables; variables["[filename]"] = fileNameRoot; ValidCalculators validCalculator; for (int i=0; igetControl_pressed()) { for(int i=0;igetControl_pressed()) { delete subset; delete lookup; break; } rCurve = new Rarefact(subset, rDisplays, jumble, processors); rCurve->getSharedCurve(freq, nIters); delete rCurve; if (subsample) { subsampleLookup(subset, fileNameRoot); } delete lookup; delete subset; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel, thisSet); if (lookup != nullptr) { subset = new SharedRAbundVectors(); data = lookup->getSharedRAbundVectors(); if (thisSet != "") {//remove unwanted groups for (int i = 0; i < data.size(); i++) { if (util.inUsersGroups(data[i]->getGroup(), newGroups)) { subset->push_back(data[i]); } } subset->eliminateZeroOTUS(); }else { for (int i = 0; i < data.size(); i++) { subset->push_back(data[i]); } } }else { subset = nullptr; } } for(int i=0;igetControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } } return 0; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "process"); exit(1); } } //********************************************************************************************************************** int RareFactSharedCommand::subsampleLookup(SharedRAbundVectors*& thisLookup, string fileNameRoot) { try { map > filenames; SubSample sample; for (int thisIter = 0; thisIter < iters; thisIter++) { SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*thisLookup); if (withReplacement) { sample.getSampleWithReplacement(thisItersLookup, subsampleSize); } else { sample.getSample(thisItersLookup, subsampleSize); } string thisfileNameRoot = fileNameRoot + toString(thisIter); map variables; variables["[filename]"] = thisfileNameRoot; vector rDisplays; ValidCalculators validCalculator; for (int i=0; imothurOutJustToScreen(toString(thisIter+1)+"\n"); } } //create std and ave outputs vector< vector< vector< double > > > results; //iter -> numSampled -> data for (map >::iterator it = filenames.begin(); it != filenames.end(); it++) { vector thisTypesFiles = it->second; vector columnHeaders; for (int i = 0; i < thisTypesFiles.size(); i++) { ifstream in; util.openInputFile(thisTypesFiles[i], in); string headers = util.getline(in); gobble(in); columnHeaders = util.splitWhiteSpace(headers); int numCols = columnHeaders.size(); vector > thisFilesLines; while (!in.eof()) { if (m->getControl_pressed()) { break; } vector data; data.resize(numCols, 0); //read numSampled line for (int j = 0; j < numCols; j++) { in >> data[j]; gobble(in); } thisFilesLines.push_back(data); } in.close(); results.push_back(thisFilesLines); util.mothurRemove(thisTypesFiles[i]); } if (!m->getControl_pressed()) { //process results map variables; variables["[filename]"] = fileNameRoot + "ave-std." + thisLookup->getLabel() + "."; string outputFile = getOutputFileName(it->first,variables); ofstream out; util.openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes[it->first].push_back(outputFile); out << columnHeaders[0] << '\t' << "method"; for (int i = 1; i < columnHeaders.size(); i++) { out << '\t' << columnHeaders[i]; } out << endl; vector< vector > aveResults; aveResults.resize(results[0].size()); for (int i = 0; i < aveResults.size(); i++) { aveResults[i].resize(results[0][i].size(), 0.0); } for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator for (int i = 0; i < aveResults.size(); i++) { //initialize sums to zero. aveResults[i][0] = results[thisIter][i][0]; for (int j = 1; j < aveResults[i].size(); j++) { aveResults[i][j] += results[thisIter][i][j]; } } } for (int i = 0; i < aveResults.size(); i++) { //finds average. for (int j = 1; j < aveResults[i].size(); j++) { aveResults[i][j] /= (float) iters; } } //standard deviation vector< vector > stdResults; stdResults.resize(results[0].size()); for (int i = 0; i < stdResults.size(); i++) { stdResults[i].resize(results[0][i].size(), 0.0); } for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each for (int i = 0; i < stdResults.size(); i++) { stdResults[i][0] = aveResults[i][0]; for (int j = 1; j < stdResults[i].size(); j++) { stdResults[i][j] += ((results[thisIter][i][j] - aveResults[i][j]) * (results[thisIter][i][j] - aveResults[i][j])); } } } for (int i = 0; i < stdResults.size(); i++) { //finds average. out << aveResults[i][0] << '\t' << "ave"; for (int j = 1; j < aveResults[i].size(); j++) { out << '\t' << aveResults[i][j]; } out << endl; out << stdResults[i][0] << '\t' << "std"; for (int j = 1; j < stdResults[i].size(); j++) { stdResults[i][j] /= (float) iters; stdResults[i][j] = sqrt(stdResults[i][j]); out << '\t' << stdResults[i][j]; } out << endl; } out.close(); } } return 0; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "subsample"); exit(1); } } //********************************************************************************************************************** vector RareFactSharedCommand::createGroupFile(vector& outputNames) { try { vector newFileNames; //find different types of files map > typesFiles; map > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci. vector groupNames; for (int i = 0; i < outputNames.size(); i++) { string extension = util.getExtension(outputNames[i]); string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + extension; util.mothurRemove(combineFileName); //remove old file ifstream in; util.openInputFile(outputNames[i], in); string labels = util.getline(in); gobble(in); vector theseLabels = util.splitWhiteSpace(labels); vector< vector > allLabels; vector thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping for (int j = 1; j < theseLabels.size()-1; j++) { thisSet.push_back(theseLabels[j]); j++; //j+1 thisSet.push_back(theseLabels[j]); j++; //j+2 thisSet.push_back(theseLabels[j]); allLabels.push_back(thisSet); thisSet.clear(); } fileLabels[combineFileName] = allLabels; map >::iterator itfind = typesFiles.find(extension); if (itfind != typesFiles.end()) { (itfind->second)[outputNames[i]] = file2Group[i]; }else { map temp; temp[outputNames[i]] = file2Group[i]; typesFiles[extension] = temp; } if (!(util.inUsersGroups(file2Group[i], groupNames))) { groupNames.push_back(file2Group[i]); } } //for each type create a combo file for (map >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) { ofstream out; string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + it->first; util.openOutputFileAppend(combineFileName, out); newFileNames.push_back(combineFileName); map thisTypesFiles = it->second; //it->second maps filename to group set numSampledSet; //open each type summary file map > > > files; //maps file name to lines in file int maxLines = 0; for (map::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) { string thisfilename = itFileNameGroup->first; string group = itFileNameGroup->second; if (m->getDebug()) { m->mothurOut("[DEBUG]: " + thisfilename + "\t" + group + "\n"); } ifstream temp; util.openInputFile(thisfilename, temp); //read through first line - labels string dummy = util.getline(temp); gobble(temp); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + dummy + "\t" + toString(fileLabels[combineFileName].size()) + "\n"); } map > > thisFilesLines; while (!temp.eof()){ float numSampled = 0; string thisLineInfo = util.getline(temp); gobble(temp); vector parsedLine = util.splitWhiteSpace(thisLineInfo); util.mothurConvert(parsedLine[0], numSampled); vector< vector > theseReads; vector thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear(); int columnIndex = 1; //0 -> numSampled, 1 -> 0.03, 2 -> 0.03lci, 3 -> 0.03hci, 4 -> 0.05, 5 -> 0.05lci, 6 -> 0.05hci for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A vector reads; string next = ""; int numColumnsPerLabel = fileLabels[combineFileName][k].size(); // 0.03 lci hci ... 0.05 lci hci -> 3 columns for (int l = 0; l < numColumnsPerLabel; l++) { reads.push_back(parsedLine[columnIndex]); columnIndex++; } theseReads.push_back(reads); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + util.getStringFromVector(reads, " ") + "\n"); } } thisFilesLines[numSampled] = theseReads; gobble(temp); numSampledSet.insert(numSampled); } files[group] = thisFilesLines; //save longest file for below if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); } temp.close(); util.mothurRemove(thisfilename); } //output new labels line out << fileLabels[combineFileName][0][0]; for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A for (int n = 0; n < groupNames.size(); n++) { // for each group for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels out << '\t' << fileLabels[combineFileName][k][l]; // << '-' << groupNames[n]; } } } out << endl; //for each label for (set::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) { out << (*itNumSampled); if (m->getControl_pressed()) { break; } for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk //grab data for each group for (map > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) { string group = itFileNameGroup->first; map > >::iterator itLine = files[group].find(*itNumSampled); if (itLine != files[group].end()) { for (int l = 0; l < (itLine->second)[k].size(); l++) { out << '\t' << (itLine->second)[k][l]; } }else { for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { out << "\tNA"; } } } } out << endl; } out.close(); } //return combine file name return newFileNames; } catch(exception& e) { m->errorOut(e, "RareFactSharedCommand", "createGroupFile"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/rarefactsharedcommand.h000077500000000000000000000031351424121717000227150ustar00rootroot00000000000000#ifndef RAREFACTSHAREDCOMMAND_H #define RAREFACTSHAREDCOMMAND_H /* * rarefactsharedcommand.h * Dotur * * Created by Sarah Westcott on 1/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "rarefact.h" #include "display.h" #include "validcalculator.h" #include "designmap.h" class RareFactSharedCommand : public Command { public: RareFactSharedCommand(string); ~RareFactSharedCommand() = default; vector setParameters(); string getCommandName() { return "rarefaction.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Magurran AE (2004). Measuring biological diversity. Blackwell Pub.: Malden, Ma. \nhttp://www.mothur.org/wiki/Rarefaction.shared"; } string getDescription() { return "generate inter-sample rarefaction curves using a re-sampling without replacement approach"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int nIters, subsampleSize, iters, processors; string format; float freq; map file2Group; //index in outputNames[i] -> group bool abort, allLines, jumble, groupMode, subsample, withReplacement; set labels; //holds labels to be used string label, calc, groups, sharedfile, designfile; vector Estimators, Groups, outputNames, Sets; int process(DesignMap&, string); vector createGroupFile(vector&); int subsampleLookup(SharedRAbundVectors*&, string); }; #endif mothur-1.48.0/source/commands/removedistscommand.cpp000077500000000000000000000355531424121717000226470ustar00rootroot00000000000000// // removedistscommand.cpp // Mothur // // Created by Sarah Westcott on 1/29/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "removedistscommand.h" //********************************************************************************************************************** vector RemoveDistsCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["column"] = tempOutNames; outputTypes["phylip"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveDistsCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.dists command removes distances from a phylip or column file related to groups or sequences listed in an accnos file.\n"; helpString += "The remove.dists command parameters are accnos, phylip and column.\n"; helpString += "The remove.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n"; helpString += "Example remove.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveDistsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "phylip") { pattern = "[filename],pick,[extension]"; } else if (type == "column") { pattern = "[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveDistsCommand::RemoveDistsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { current->setColumnFile(columnfile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or column file.\n"); abort = true; } } } } } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand"); exit(1); } } //********************************************************************************************************************** int RemoveDistsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get names you want to keep names = util.readAccnos(accnosfile); if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (phylipfile != "") { readPhylip(); } if (columnfile != "") { readColumn(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int RemoveDistsCommand::readPhylip(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(phylipfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(phylipfile)); variables["[extension]"] = util.getExtension(phylipfile); string outputFileName = getOutputFileName("phylip", variables); ifstream in; util.openInputFile(phylipfile, in); float distance; int square, nseqs; string name; unsigned int row; set rows; //converts names in names to a index row = 0; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } //not one we want to remove if (names.count(name) == 0) { rows.insert(row); } row++; //is the matrix square? char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } //map name to row/column if(square == 0){ for(int i=1;i> name; if (names.count(name) == 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return 0; } in >> distance; } } } else{ for(int i=1;i> name; if (names.count(name) == 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return 0; } in >> distance; } } } in.close(); if (m->getControl_pressed()) { return 0; } //read through file only printing rows and columns of seqs in names ifstream inPhylip; util.openInputFile(phylipfile, inPhylip); inPhylip >> numTest; ofstream out; util.openOutputFile(outputFileName, out); outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); out << (nseqs-names.size()) << endl; unsigned int count = 0; unsigned int keptCount = 0; if(square == 0){ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) != 0) { ignoreRow = true; count++; } else{ out << name; keptCount++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return 0; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << '\t' << distance; } } } if (!ignoreRow) { out << endl; } } } else{ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) != 0) { ignoreRow = true; count++; } else{ out << name; keptCount++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return 0; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << '\t' << distance; } } } if (!ignoreRow) { out << endl; } } } inPhylip.close(); out.close(); if (keptCount == 0) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file.\n"); } else if (count != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file.\n"); //rewrite with new number util.renameFile(outputFileName, outputFileName+".temp"); ofstream out2; util.openOutputFile(outputFileName, out2); out2 << keptCount << endl; ifstream in3; util.openInputFile(outputFileName+".temp", in3); in3 >> nseqs; gobble(in3); char buffer[4096]; while (!in3.eof()) { in3.read(buffer, 4096); out2.write(buffer, in3.gcount()); } in3.close(); out2.close(); util.mothurRemove(outputFileName+".temp"); } m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "readPhylip"); exit(1); } } //********************************************************************************************************************** int RemoveDistsCommand::readColumn(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(columnfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(columnfile)); variables["[extension]"] = util.getExtension(columnfile); string outputFileName = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(columnfile, in); set removeNames; string firstName, secondName; float distance; bool wrote = false; while (!in.eof()) { if (m->getControl_pressed()) { out.close(); in.close(); return 0; } in >> firstName >> secondName >> distance; gobble(in); //is either names in the accnos file if (names.count(firstName) != 0) { removeNames.insert(firstName); if (names.count(secondName) != 0) { removeNames.insert(secondName); } } else if (names.count(secondName) != 0) { removeNames.insert(secondName); if (names.count(firstName) != 0) { removeNames.insert(firstName); } } else { wrote = true; out << firstName << '\t' << secondName << '\t' << distance << endl; } } in.close(); out.close(); if (!wrote) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file.\n"); } else if (removeNames.size() != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file.\n"); } m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveDistsCommand", "readColumn"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removedistscommand.h000077500000000000000000000020401424121717000222750ustar00rootroot00000000000000// // removedistscommand.h // Mothur // // Created by Sarah Westcott on 1/29/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_removedistscommand_h #define Mothur_removedistscommand_h #include "command.hpp" class RemoveDistsCommand : public Command { public: RemoveDistsCommand(string); ~RemoveDistsCommand(){} vector setParameters(); string getCommandName() { return "remove.dists"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Remove.dists"; } string getDescription() { return "removes distances from a phylip or column file related to groups or sequences listed in an accnos file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; string accnosfile, phylipfile, columnfile; bool abort; vector outputNames; int readPhylip(); int readColumn(); }; #endif mothur-1.48.0/source/commands/removegroupscommand.cpp000077500000000000000000001331151424121717000230310ustar00rootroot00000000000000/* * removegroupscommand.cpp * Mothur * * Created by westcott on 11/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "removegroupscommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "inputdata.h" #include "designmap.h" //********************************************************************************************************************** vector RemoveGroupsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false,true); parameters.push_back(pshared); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT","group",false,false,true); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT","design",false,false); parameters.push_back(pdesign); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT","list",false,false,true); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT","taxonomy",false,false,true); parameters.push_back(ptaxonomy); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["design"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveGroupsCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design, phylip, column or sharedfile.\n"; helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n"; helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design, phylip, column, sets and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n"; helpString += "You must also provide an accnos containing the list of groups to remove or set the groups or sets parameter to the groups you wish to remove.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed. You can separate group names with dashes.\n"; helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to remove. You can separate set names with dashes.\n"; helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n"; helpString += "Example remove.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n"; helpString += "or remove.groups(groups=pasture, fasta=amazon.fasta, amazon.groups).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveGroupsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "phylip") { pattern = "[filename],pick,[extension]"; } else if (type == "column") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "design") { pattern = "[filename],[tag],pick,[extension]-[filename],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveGroupsCommand::RemoveGroupsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { accnosfile = ""; abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { current->setColumnFile(columnfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { designfile = ""; } else { current->setDesignFile(designfile); } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); } sets = validParameter.valid(parameters, "sets"); if (sets == "not found") { sets = ""; } else { util.splitAtDash(sets, Sets); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { //is there are current file available for any of these? if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) { //give priority to group, then shared groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current groupfile, countfile or sharedfile and one is required.\n"); abort = true; } } } }else { //give priority to shared, then group sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { designfile = current->getDesignFile(); if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current groupfile, designfile, countfile or sharedfile and one is required.\n"); abort = true; } } } } } } if ((accnosfile == "") && (Groups.size() == 0) && (Sets.size() == 0)) { m->mothurOut("[ERROR]: You must provide an accnos file or specify groups using the groups or sets parameters.\n"); abort = true; } if ((Groups.size() != 0) && (Sets.size() != 0)) { m->mothurOut("[ERROR]: You cannot use the groups and sets parameters at the same time, quitting.\n"); abort = true; } if ((Sets.size() != 0) && (designfile == "")) { m->mothurOut("[ERROR]: You must provide a design file when using the sets parameter.\n"); abort = true; } if ((phylipfile == "") && (columnfile == "") && (fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("[ERROR]: You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count, phylip, column or list.\n"); abort = true; } if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("[ERROR]: If using a fasta, name, taxonomy, group or list, then you must provide a group or count file.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand"); exit(1); } } //********************************************************************************************************************** int RemoveGroupsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { util.readAccnos(accnosfile, Groups); } else if (Sets.size() != 0) { fillGroupsFromDesign(); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all vector namesGroups = groupMap->getNamesOfGroups(); vector checkedGroups; for (int i = 0; i < Groups.size(); i++) { if (util.inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); } else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); } } if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; } else { Groups = checkedGroups; } //fill names with names of sequences that are from the groups we want to remove fillNames(); delete groupMap; }else if (countfile != ""){ if ((fastafile != "") || (listfile != "") || (taxfile != "")) { //m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } vector gNamesOfGroups; CountTable ct; if (!ct.testGroups(countfile, gNamesOfGroups)) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } if (Groups.size() == 0) { return 0; } ct.readTable(countfile, true, false); int oldTotal = ct.getNumSeqs(); vector namesOfSeqs = ct.getNamesOfSeqs(); //all names for (int i = 0; i < Groups.size(); i++) { ct.removeGroup(Groups[i]); } vector newSeqs = ct.getNamesOfSeqs(); //names of seqs left after removing groups unordered_set goodNames = util.mothurConvert(newSeqs); for (int i = 0; i < namesOfSeqs.size(); i++) { if (goodNames.count(namesOfSeqs[i]) == 0) { //you aren't on good list names.insert(namesOfSeqs[i]); //add to remove list } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); int selectedCount = ct.getNumSeqs(); if (selectedCount == 0) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get.\n"); } else { ct.printTable(outputFileName); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); } int removedCount = oldTotal - selectedCount; m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file.\n"); } if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } if (designfile != "") { readDesign(); } if (phylipfile != "") { readPhylip(); } if (columnfile != "") { readColumn(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("design"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setDesignFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readFasta(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string name; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; currSeq.printSequence(out); }else { //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; currSeq.setName(it->second); currSeq.printSequence(out); }else { removedCount++; } } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove.\n"); } outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readShared(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } //get group names from sharedfile so we can set Groups to the groupNames we want to keep //that way we can take advantage of the reads in inputdata and sharedRabundVector InputData input(sharedfile, "sharedfile", nullVector); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); vector allGroupsNames = lookup->getNamesGroups(); map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); vector groupsToKeep; for (int i = 0; i < allGroupsNames.size(); i++) { if (!util.inUsersGroups(allGroupsNames[i], Groups)) { groupsToKeep.push_back(allGroupsNames[i]); } } if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove.\n"); delete lookup; return; } //reset read delete lookup; InputData input2(sharedfile, "sharedfile", groupsToKeep); lookup = input2.getSharedRAbundVectors(); bool wroteSomething = false; bool printHeaders = true; while(lookup != nullptr) { variables["[tag]"] = lookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); if (m->getControl_pressed()) { out.close(); util.mothurRemove(outputFileName); delete lookup; return; } if (lookup->size() != 0) { wroteSomething = true; } lookup->print(out, printHeaders); //get next line to process //prevent memory leak delete lookup; lookup = input2.getSharedRAbundVectors(); out.close(); } if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove.\n"); } string groupsString = ""; for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; } groupsString += Groups[Groups.size()-1]; m->mothurOut("Removed groups: " + groupsString + " from your shared file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readShared"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readList(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int removedCount = 0; while(list != nullptr) { removedCount = 0; variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector binLabels = list->getLabels(); vector newBinLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(outputFileName); return; } //parse out names that are in accnos file string binnames = list->get(i); string newNames = ""; while (binnames.find_first_of(',') != -1) { string name = binnames.substr(0,binnames.find_first_of(',')); binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); //if that name is in the .accnos file, add it if (names.count(name) == 0) { newNames += name + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } } //get last name if (names.count(binnames) == 0) { newNames += binnames + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(binnames); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.print(out, false); } out.close(); delete list; list = input.getListVector(); } if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove.\n"); } m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readName(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputFileName = getOutputFileName("name", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(namefile, in); string name, firstCol, secondCol; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> firstCol; gobble(in); in >> secondCol; gobble(in); vector parsedNames; util.splitAtComma(secondCol, parsedNames); vector validSecond; validSecond.clear(); for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) == 0) { validSecond.push_back(parsedNames[i]); } } removedCount += parsedNames.size()-validSecond.size(); //if the name in the first column is in the set then print it and any other names in second column also in set if (names.count(firstCol) == 0) { wroteSomething = true; out << firstCol << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; //make first name in set you come to first column and then add the remaining names to second column }else { //you want part of this row if (validSecond.size() != 0) { wroteSomething = true; out << validSecond[0] << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; uniqueToRedundant[firstCol] = validSecond[0]; } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove.\n"); } outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readName"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readGroup(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(groupfile, in); string name, group; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; //read from first column in >> group; //read from second column //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; out << name << '\t' << group << endl; }else { removedCount++; } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove.\n"); } outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readGroup"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readDesign(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(designfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(designfile)); variables["[extension]"] = util.getExtension(designfile); string outputFileName = getOutputFileName("design", variables); DesignMap designMap(designfile); if (m->getControl_pressed()) { return ; } vector groupsToKeep; vector allGroups = designMap.getNamesGroups(); for(int i = 0; i < allGroups.size(); i++) { if (!util.inUsersGroups(allGroups[i], Groups)) { groupsToKeep.push_back(allGroups[i]); } } bool wroteSomething = false; ofstream out; util.openOutputFile(outputFileName, out); int numGroupsFound = designMap.printGroups(out, groupsToKeep); if (numGroupsFound > 0) { wroteSomething = true; } out.close(); names.clear(); names = util.mothurConvert(Groups); int removedCount = allGroups.size() - numGroupsFound; if (wroteSomething == false) { m->mothurOut("Your file contains only groups from the groups you wish to remove.\n"); } outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " groups from your design file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readDesign"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); tax = util.getline(in); gobble(in); //if this name is in the accnos file if (names.count(name) == 0) { wroteSomething = true; out << name << '\t' << tax << endl; }else { //if you are not in the accnos file check if you are a name that needs to be changed map::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { wroteSomething = true; out << it->second << '\t' << tax << endl; }else { removedCount++; } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove.\n"); } outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readPhylip(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(phylipfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(phylipfile)); variables["[extension]"] = util.getExtension(phylipfile); string outputFileName = getOutputFileName("phylip", variables); ifstream in; util.openInputFile(phylipfile, in); float distance; int square, nseqs; square = 0; string name; unsigned int row; set rows; //converts names in names to a index row = 0; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); m->setControl_pressed(true); return; } else { convert(numTest, nseqs); } //not one we want to remove if (names.count(name) == 0) { rows.insert(row); } row++; //is the matrix square? char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } //map name to row/column if(square == 0){ for(int i=1;i> name; if (names.count(name) == 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return; } in >> distance; } } } else{ for(int i=1;i> name; if (names.count(name) == 0) { rows.insert(row); } row++; for(int j=0;jgetControl_pressed()) { in.close(); return; } in >> distance; } } } in.close(); if (m->getControl_pressed()) { return; } //read through file only printing rows and columns of seqs in names ifstream inPhylip; util.openInputFile(phylipfile, inPhylip); inPhylip >> numTest; ofstream out; util.openOutputFile(outputFileName, out); outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); out << (nseqs-names.size()) << endl; unsigned int count = 0; unsigned int keptCount = 0; if(square == 0){ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) != 0) { ignoreRow = true; count++; } else{ out << name; keptCount++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << '\t' << distance; } } } if (!ignoreRow) { out << endl; } } } else{ for(int i=0;i> name; bool ignoreRow = false; if (names.count(name) != 0) { ignoreRow = true; count++; } else{ out << name; keptCount++; } for(int j=0;jgetControl_pressed()) { inPhylip.close(); out.close(); return; } inPhylip >> distance; if (!ignoreRow) { //is this a column we want if(rows.count(j) != 0) { out << '\t' << distance; } } } if (!ignoreRow) { out << endl; } } } inPhylip.close(); out.close(); if (keptCount == 0) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file.\n"); } else if (count != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file.\n"); //rewrite with new number util.renameFile(outputFileName, outputFileName+".temp"); ofstream out2; util.openOutputFile(outputFileName, out2); out2 << keptCount << endl; ifstream in3; util.openInputFile(outputFileName+".temp", in3); in3 >> nseqs; gobble(in3); char buffer[4096]; while (!in3.eof()) { in3.read(buffer, 4096); out2.write(buffer, in3.gcount()); } in3.close(); out2.close(); util.mothurRemove(outputFileName+".temp"); } m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readPhylip"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::readColumn(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(columnfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(columnfile)); variables["[extension]"] = util.getExtension(columnfile); string outputFileName = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(columnfile, in); set removeNames; string firstName, secondName; float distance; bool wrote = false; while (!in.eof()) { if (m->getControl_pressed()) { out.close(); in.close(); return; } in >> firstName >> secondName >> distance; gobble(in); //is either names in the accnos file if (names.count(firstName) != 0) { removeNames.insert(firstName); if (names.count(secondName) != 0) { removeNames.insert(secondName); } } else if (names.count(secondName) != 0) { removeNames.insert(secondName); if (names.count(firstName) != 0) { removeNames.insert(firstName); } } else { wrote = true; out << firstName << '\t' << secondName << '\t' << distance << endl; } } in.close(); out.close(); if (!wrote) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file.\n"); } else if (removeNames.size() != names.size()) { m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file.\n"); } m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file.\n"); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readColumn"); exit(1); } } //********************************************************************************************************************** //names to remove void RemoveGroupsCommand::fillNames(){ try { vector seqs = groupMap->getNamesSeqs(); for (int i = 0; i < seqs.size(); i++) { if (m->getControl_pressed()) { break; } string group = groupMap->getGroup(seqs[i]); if (util.inUsersGroups(group, Groups)) { names.insert(seqs[i]); } } } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "fillNames"); exit(1); } } //********************************************************************************************************************** void RemoveGroupsCommand::fillGroupsFromDesign(){ try { DesignMap designMap(designfile); if (m->getControl_pressed()) { return ; } Groups = designMap.getNamesGroups(Sets); } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "fillGroupsFromDesign"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removegroupscommand.h000077500000000000000000000033441424121717000224760ustar00rootroot00000000000000#ifndef REMOVEGROUPSCOMMAND_H #define REMOVEGROUPSCOMMAND_H /* * removegroupscommand.h * Mothur * * Created by westcott on 11/10/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "groupmap.h" class RemoveGroupsCommand : public Command { #ifdef UNIT_TEST friend class TestRemoveGroupsCommand; #endif public: RemoveGroupsCommand(string); ~RemoveGroupsCommand(){} vector setParameters(); string getCommandName() { return "remove.groups"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Remove.groups"; } string getDescription() { return "removes sequences from a list, fasta, name, group, shared, design or taxonomy file from a given group or set of groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; string accnosfile, fastafile, namefile, groupfile, countfile, designfile, listfile, taxfile, groups, sharedfile, phylipfile, columnfile, sets; bool abort; vector outputNames, Groups, Sets; GroupMap* groupMap; map uniqueToRedundant; //if a namefile is given and the first column name is not selected //then the other files need to change the unique name in their file to match. //only add the names that need to be changed to keep the map search quick void readFasta(); void readName(); void readGroup(); void readList(); void readTax(); void fillNames(); void readShared(); void readDesign(); void readPhylip(); void readColumn(); void fillGroupsFromDesign(); }; #endif mothur-1.48.0/source/commands/removelineagecommand.cpp000077500000000000000000000670071424121717000231240ustar00rootroot00000000000000/* * removelineagecommand.cpp * Mothur * * Created by westcott on 9/24/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "removelineagecommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "counttable.h" #include "inputdata.h" #include "taxonomy.hpp" //********************************************************************************************************************** vector RemoveLineageCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon); CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["accnos"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveLineageCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n"; helpString += "It outputs a file containing only the sequences or OTUS from the taxonomy file that are not from the taxon you requested to be removed.\n"; helpString += "The remove.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups. You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n"; helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n"; helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n"; helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n"; helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n"; helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n"; helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n"; helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n"; helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveLineageCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "constaxonomy") { pattern = "[filename],pick.cons.taxonomy"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "alignreport") { pattern = "[filename],pick.[extension]"; } else if (type == "accnos") { pattern = "[filename],accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveLineageCommand::RemoveLineageCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } alignfile = validParameter.validFile(parameters, "alignreport"); if (alignfile == "not open") { abort = true; } else if (alignfile == "not found") { alignfile = ""; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } constaxonomy = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomy == "not open") { constaxonomy = ""; abort = true; } else if (constaxonomy == "not found") { constaxonomy = ""; } if ((constaxonomy == "") && (taxfile == "")) { taxfile = current->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required.\n"); abort = true; } } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } string usedDups = "true"; string temp = validParameter.valid(parameters, "dups"); if (temp == "not found") { if (namefile != "") { temp = "true"; } else { temp = "false"; usedDups = ""; } } dups = util.isTrue(temp); taxons = validParameter.valid(parameters, "taxon"); if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct.\n"); abort = true; } else { //rip off quotes if (taxons[0] == '\'') { taxons = taxons.substr(1); } if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); } } util.splitAtChar(taxons, listOfTaxons, '-'); if (m->getDebug()) { string taxonString = util.getStringFromVector(listOfTaxons, ", "); m->mothurOut("[DEBUG]: " + taxonString + "\n."); } if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile.\n"); abort = true; } if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) { m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n"); abort = true; } if ((constaxonomy != "") && (taxfile != "")) { m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true; } if ((sharedfile != "") && (taxfile != "")) { m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true; } if ((sharedfile != "") || (listfile != "")) { label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } } if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand"); exit(1); } } //********************************************************************************************************************** int RemoveLineageCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (m->getControl_pressed()) { return 0; } if (countfile != "") { if ((fastafile != "") || (listfile != "") || (taxfile != "")) { //m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } } //read through the correct file and output lines you want to keep if (taxfile != "") { string accnosFileName = readTax(); //fills the set of names to get if (!util.isBlank(accnosFileName)) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); runRemoveSeqs(accnosFileName); }else{ m->mothurOut("\n*** No contaminants to remove ***\n"); util.mothurRemove(accnosFileName); } }else { string accnosFileName = readConsTax(); //writes accnos file with otuNames if (!util.isBlank(accnosFileName)) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); runRemoveOTUs(accnosFileName); }else{ m->mothurOut("\n*** No contaminants to remove ***\n"); util.mothurRemove(accnosFileName); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "execute"); exit(1); } } //********************************************************************************************************************** int RemoveLineageCommand::runRemoveSeqs(string accnosFileName){ try { //use remove.seqs to create new list and shared files if ((namefile != "") || (fastafile != "") || (countfile != "") || (groupfile != "") || (alignfile != "") || (listfile != "")) { string inputString = "accnos=" + accnosFileName; if (namefile != "") { inputString += ", name=" + namefile; } if (countfile != "") { inputString += ", count=" + countfile; } if (fastafile != "") { inputString += ", fasta=" + fastafile; } if (groupfile != "") { inputString += ", group=" + groupfile; } if (alignfile != "") { inputString += ", alignreport=" + alignfile; } if (listfile != "") { inputString += ", list=" + listfile; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); outputTypes.insert(filenames.begin(), filenames.end()); if (listfile != "") { vector files = filenames["list"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (namefile != "") { vector files = filenames["name"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (countfile != "") { vector files = filenames["count"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (fastafile != "") { vector files = filenames["fasta"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (groupfile != "") { vector files = filenames["group"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (alignfile != "") { vector files = filenames["alignreport"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "runRemoveSeqs"); exit(1); } } //********************************************************************************************************************** int RemoveLineageCommand::runRemoveOTUs(string accnosFileName){ try { //use remove.otus to create new list and shared files if ((listfile != "") || (sharedfile != "")) { string inputString = "accnos=" + accnosFileName; if (listfile != "") { inputString += ", list=" + listfile; } if (sharedfile != "") { inputString += ", shared=" + sharedfile; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: remove.otus(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveOtusCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); outputTypes.insert(filenames.begin(), filenames.end()); if (listfile != "") { vector files = filenames["list"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } if (sharedfile != "") { vector files = filenames["shared"]; outputNames.insert(outputNames.end(), files.begin(), files.end()); } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "runRemoveOTUs"); exit(1); } } //********************************************************************************************************************** string RemoveLineageCommand::readTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); string accnosFileName = getOutputFileName("accnos", variables); ofstream out, outAccnos; util.openOutputFile(outputFileName, out); util.openOutputFile(accnosFileName, outAccnos); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; vector taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false); vector< vector > searchTaxons; searchTaxons.resize(listOfTaxons.size()); for (int i = 0; i < listOfTaxons.size(); i++) { bool hasCon = false; searchTaxons[i] = util.getTaxons(listOfTaxons[i], hasCon); taxonsHasConfidence[i] = hasCon; } while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; gobble(in); tax = util.getline(in); gobble(in); Taxonomy thisSeq(name, tax); vector otuTax = thisSeq.getTaxons(); util.removeQuotes(otuTax); if (!util.searchTax(otuTax, taxonsHasConfidence, searchTaxons)) { wroteSomething = true; out << name << '\t' << tax << endl; }else { outAccnos << name << endl; } } in.close(); out.close(); outAccnos.close(); if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + ".\n"); } outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); return accnosFileName; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "readTax"); exit(1); } } //********************************************************************************************************************** string RemoveLineageCommand::readConsTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomy); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomy)); string accnosFileName = getOutputFileName("accnos", variables); string outputFileName = getOutputFileName("constaxonomy", variables); ofstream out, outAccnos; util.openOutputFile(outputFileName, out); util.openOutputFile(accnosFileName, outAccnos); ifstream in; util.openInputFile(constaxonomy, in); string otuLabel, tax; //read headers string headers = util.getline(in); out << headers << endl; bool wroteSomething = false; vector taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false); vector< vector > searchTaxons; searchTaxons.resize(listOfTaxons.size()); for (int i = 0; i < listOfTaxons.size(); i++) { bool hasCon = false; searchTaxons[i] = util.getTaxons(listOfTaxons[i], hasCon); taxonsHasConfidence[i] = hasCon; } int numR = 0; while(!in.eof()){ if (m->getControl_pressed()) { break; } Taxonomy thisOtu(in); vector otuTax = thisOtu.getTaxons(); util.removeQuotes(otuTax); if (!util.searchTax(otuTax, taxonsHasConfidence, searchTaxons)) { thisOtu.printConsTax(out); wroteSomething = true; }else { outAccnos << thisOtu.getName() << endl; numR++; } } in.close(); out.close(); outAccnos.close(); if (!wroteSomething) { m->mothurOut("Your constaxonomy file contains OTUs only from " + taxons + ".\n"); } else { m->mothurOut("Removed " + toString(numR) + " OTUs from your cons.taxonomy file.\n"); } outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName); return accnosFileName; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "readConsTax"); exit(1); } } /**************************************************************************************************/ vector< map > RemoveLineageCommand::getTaxons(string tax) { try { vector< map > t; string taxon = ""; int taxLength = tax.length(); for(int i=0;i temp; temp[newtaxon] = con; t.push_back(temp); taxon = ""; } else{ taxon += tax[i]; } } return t; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "getTaxons"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removelineagecommand.h000077500000000000000000000025721424121717000225650ustar00rootroot00000000000000#ifndef REMOVELINEAGECOMMAND_H #define REMOVELINEAGECOMMAND_H /* * removelineagecommand.h * Mothur * * Created by westcott on 9/24/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sharedrabundvectors.hpp" #include "listvector.hpp" #include "removeseqscommand.h" #include "removeotuscommand.h" class RemoveLineageCommand : public Command { public: RemoveLineageCommand(string); ~RemoveLineageCommand(){}; vector setParameters(); string getCommandName() { return "remove.lineage"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Remove.lineage"; } string getDescription() { return "removes sequences from a list, fasta, name, group, alignreport or taxonomy file from a given taxonomy or set of taxonomies"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames, listOfTaxons; string fastafile, namefile, groupfile, alignfile, listfile, countfile, taxfile, taxons, sharedfile, constaxonomy, label; bool abort, dups; string readTax(); string readConsTax(); int runRemoveOTUs(string); int runRemoveSeqs(string); vector< map > getTaxons(string); }; #endif mothur-1.48.0/source/commands/removeotuscommand.cpp000077500000000000000000000601341424121717000225040ustar00rootroot00000000000000// // removeotulabels.cpp // Mothur // // Created by Sarah Westcott on 5/21/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "removeotuscommand.h" //********************************************************************************************************************** vector RemoveOtusCommand::setParameters(){ try { CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false); parameters.push_back(pconstaxonomy); CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false); parameters.push_back(potucorr); CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false); parameters.push_back(pcorraxes); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["list"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveOtusCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.otus command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes. It can also be used to select a set of otus from a shared or list file.\n"; helpString += "The remove.otus parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n"; helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n"; helpString += "The otucorr parameter is input the results of the otu.association command.\n"; helpString += "The corraxes parameter is input the results of the corr.axes command.\n"; helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The remove.otus commmand should be in the following format: \n"; helpString += "remove.otus(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveOtusCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveOtusCommand::RemoveOtusCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } corraxesfile = validParameter.validFile(parameters, "corraxes"); if (corraxesfile == "not open") { corraxesfile = ""; abort = true; } else if (corraxesfile == "not found") { corraxesfile = ""; } otucorrfile = validParameter.validFile(parameters, "otucorr"); if (otucorrfile == "not open") { otucorrfile = ""; abort = true; } else if (otucorrfile == "not found") { otucorrfile = ""; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list.\n"); abort = true; } if ((sharedfile != "") || (listfile != "")) { label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile.\n"); label=""; } } } } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //get labels you want to keep otulabels = util.readAccnos(accnosfile); //simplfy labels unordered_set newLabels; for (auto it = otulabels.begin(); it != otulabels.end(); it++) { newLabels.insert(util.getSimpleLabel(*it)); } otulabels = newLabels; if (m->getDebug()) { m->mothurOut("[DEBUG]: numlabels = " + toString(otulabels.size()) + "\n"); } if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (constaxonomyfile != "") { readClassifyOtu(); } if (corraxesfile != "") { readCorrAxes(); } if (otucorrfile != "") { readOtuAssociation(); } if (listfile != "") { readList(); } if (sharedfile != "") { readShared(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); string currentName = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } //set constaxonomy file as new current constaxonomyfile itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "execute"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::readClassifyOtu(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomyfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomyfile)); variables["[extension]"] = util.getExtension(constaxonomyfile); string outputFileName = getOutputFileName("constaxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(constaxonomyfile, in); bool wroteSomething = false; int removedCount = 0; //read headers string headers = util.getline(in); gobble(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; string tax = "unknown"; int size = 0; in >> otu >> size; gobble(in); tax = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + otu + toString(size) + tax + "\n"); } if (otulabels.count(util.getSimpleLabel(otu)) == 0) { wroteSomething = true; out << otu << '\t' << size << '\t' << tax << endl; }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " otus from your constaxonomy file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "readClassifyOtu"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::readOtuAssociation(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(otucorrfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(otucorrfile)); variables["[extension]"] = util.getExtension(otucorrfile); string outputFileName = getOutputFileName("otucorr", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(otucorrfile, in); bool wroteSomething = false; int removedCount = 0; //read headers string headers = util.getline(in); gobble(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu1 = ""; string otu2 = ""; in >> otu1 >> otu2; string line = util.getline(in); gobble(in); if ((otulabels.count(util.getSimpleLabel(otu1)) == 0) && (otulabels.count(util.getSimpleLabel(otu2)) == 0)){ wroteSomething = true; out << otu1 << '\t' << otu2 << '\t' << line << endl; }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["otucorr"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " lines from your otu.corr file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "readOtuAssociation"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::readCorrAxes(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(corraxesfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(corraxesfile)); variables["[extension]"] = util.getExtension(corraxesfile); string outputFileName = getOutputFileName("corraxes", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(corraxesfile, in); bool wroteSomething = false; int removedCount = 0; //read headers string headers = util.getline(in); gobble(in); out << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; in >> otu; string line = util.getline(in); gobble(in); if (otulabels.count(util.getSimpleLabel(otu)) == 0) { wroteSomething = true; out << otu << '\t' << line << endl; }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file only contains labels from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["corraxes"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " lines from your corr.axes file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "readCorrAxes"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::readShared(){ try { SharedRAbundVectors* lookup = getShared(); if (m->getControl_pressed()) { delete lookup; return 0; } vector newLabels; bool wroteSomething = false; int numRemoved = 0; vector binsToRemove; for (int i = 0; i < lookup->getNumBins(); i++) { if (m->getControl_pressed()) { delete lookup; return 0; } //is this otu on the list if (otulabels.count(util.getSimpleLabel(lookup->getOTUNames()[i])) == 0) { wroteSomething = true; }else { numRemoved++; binsToRemove.push_back(i); } } lookup->removeOTUs(binsToRemove, true); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); variables["[distance]"] = lookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); bool printHeaders = true; lookup->print(out, printHeaders); out.close(); delete lookup; if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file.\n"); } m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "readShared"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::readList(){ try { getListVector(); if (m->getControl_pressed()) { delete list; return 0;} ListVector newList; newList.setLabel(list->getLabel()); int removedCount = 0; bool wroteSomething = false; vector binLabels = list->getLabels(); vector newLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { delete list; return 0;} if (otulabels.count(util.getSimpleLabel(binLabels[i])) == 0) { newList.push_back(list->get(i)); newLabels.push_back(binLabels[i]); }else { removedCount++; } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); delete list; //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.print(out, false); } out.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file.\n"); return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "readList"); exit(1); } } //********************************************************************************************************************** int RemoveOtusCommand::getListVector(){ try { InputData input(listfile, "list", nullVector); list = input.getListVector(); string lastLabel = list->getLabel(); if (label == "") { label = lastLabel; return 0; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((list != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { return 0; } if(labels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input.getListVector(lastLabel); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); break; } lastLabel = list->getLabel(); //get next line to process //prevent memory leak delete list; list = input.getListVector(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels bool needToRun = false; for (set::iterator it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; } else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete list; list = input.getListVector(lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "getListVector"); exit(1); } } //********************************************************************************************************************** SharedRAbundVectors* RemoveOtusCommand::getShared(){ try { InputData input(sharedfile, "sharedfile",nullVector); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); string lastLabel = lookup->getLabel(); if (label == "") { label = lastLabel; return lookup; } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; set userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup != nullptr) && (userLabels.size() != 0)) { if (m->getControl_pressed()) { delete lookup; return nullptr; } if(labels.count(lookup->getLabel()) == 1){ processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); break; } if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup->getLabel(); delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); //restore real lastlabel to save below lookup->setLabels(saveLabel); break; } lastLabel = lookup->getLabel(); //get next line to process //prevent memory leak delete lookup; lookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { return 0; } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); } return lookup; } catch(exception& e) { m->errorOut(e, "RemoveOtusCommand", "getShared"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removeotuscommand.h000077500000000000000000000030521424121717000221450ustar00rootroot00000000000000#ifndef Mothur_removeotulabelscommand_h #define Mothur_removeotulabelscommand_h // // removeotuscommand.h // Mothur // // Created by Sarah Westcott on 5/21/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "inputdata.h" #include "listvector.hpp" /**************************************************************************************************/ class RemoveOtusCommand : public Command { public: RemoveOtusCommand(string); ~RemoveOtusCommand(){} vector setParameters(); string getCommandName() { return "remove.otus"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Get.otus"; } string getDescription() { return "Can be used with output from classify.otu, otu.association, or corr.axes to remove specific otus."; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label; vector outputNames; unordered_set otulabels; ListVector* list; int readClassifyOtu(); int readOtuAssociation(); int readCorrAxes(); int readList(); int readShared(); int getListVector(); SharedRAbundVectors* getShared(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/removerarecommand.cpp000077500000000000000000000701641424121717000224470ustar00rootroot00000000000000/* * removerarecommand.cpp * mothur * * Created by westcott on 1/21/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "removerarecommand.h" #include "sequence.hpp" #include "groupmap.h" #include "inputdata.h" //********************************************************************************************************************** vector RemoveRareCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "atleast", "none","list",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "none", "atleast", "none","rabund",false,false,true); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "none", "atleast", "none","sabund",false,false,true); parameters.push_back(psabund); CommandParameter pshared("shared", "InputTypes", "", "", "none", "atleast", "none","shared",false,false,true); parameters.push_back(pshared); CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "none", "none","count",false,false); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pnseqs("nseqs", "Number", "", "0", "", "", "","",false,true,true); parameters.push_back(pnseqs); CommandParameter pbygroup("bygroup", "Boolean", "", "f", "", "", "","",false,false); parameters.push_back(pbygroup); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveRareCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.rare command parameters are list, rabund, sabund, shared, group, count, label, groups, bygroup and nseqs.\n"; helpString += "The remove.rare command reads one of the following file types: list, rabund, sabund or shared file. It outputs a new file after removing the rare otus.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like analyzed. Default=all. You may separate group names with dashes.\n"; helpString += "The label parameter is used to analyze specific labels in your input. default=all. You may separate label names with dashes.\n"; helpString += "The bygroup parameter is only valid with the shared file. default=f, meaning remove any OTU that has nseqs or fewer sequences across all groups.\n"; helpString += "bygroups=T means remove any OTU that has nseqs or fewer sequences in each group (if groupA has 1 sequence and group B has 100 sequences in OTUZ and nseqs=1, then set the groupA count for OTUZ to 0 and keep groupB's count at 100.) \n"; helpString += "The nseqs parameter allows you to set the cutoff for an otu to be deemed rare. It is required.\n"; helpString += "The remove.rare command should be in the following format: remove.rare(shared=yourSharedFile, nseqs=yourRareCutoff).\n"; helpString += "Example remove.rare(shared=amazon.fn.shared, nseqs=2).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveRareCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "rabund") { pattern = "[filename],pick,[extension]"; } else if (type == "sabund") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; } else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveRareCommand::RemoveRareCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for file parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { current->setRabundFile(rabundfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list, sabund, rabund or shared file.\n"); abort = true; } } } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = "all"; } util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string temp = validParameter.valid(parameters, "nseqs"); if (temp == "not found") { m->mothurOut("[ERROR]: nseqs is a required parameter, quitting.\n"); abort = true; } else { util.mothurConvert(temp, nseqs); } temp = validParameter.valid(parameters, "bygroup"); if (temp == "not found") { temp = "f"; } byGroup = util.isTrue(temp); if (byGroup && (sharedfile == "")) { m->mothurOut("The byGroup parameter is only valid with a shared file.\n"); } if (((groupfile != "") || (countfile != "")) && (listfile == "")) { m->mothurOut("A group or count file is only valid with a list file.\n"); groupfile = ""; countfile = ""; } } } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand"); exit(1); } } //********************************************************************************************************************** int RemoveRareCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (sabundfile != "") { processSabund(); } if (rabundfile != "") { processRabund(); } if (listfile != "") { processList(); } if (sharedfile != "") { processShared(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); //set rabund file as new current rabundfile string currentName = ""; itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "execute"); exit(1); } } //********************************************************************************************************************** int RemoveRareCommand::processList(){ try { //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile.\n"); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + ".\n"); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set userLabels; userLabels.insert(thisLabel); set processedLabels; string lastLabel = list->getLabel(); while((list != nullptr) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((util.anyLabelsToProcess(list->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputGroupFileName = getOutputFileName("group", variables); variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputCountFileName = getOutputFileName("count", variables); ofstream out, outGroup; util.openOutputFile(outputFileName, out); bool wroteSomething = false; //if groupfile is given then use it GroupMap* groupMap; CountTable ct; CountTable newCountTable; //instead of removing rare, fill new count table with "good" seqs bool selectedGroups = true; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); if (Groups.size() == 0) { Groups = groupMap->getNamesOfGroups(); selectedGroups = false; } util.openOutputFile(outputGroupFileName, outGroup); }else if (countfile != "") { ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { vector namesGroups = ct.getNamesOfGroups(); if (Groups.size() == 0) { Groups = ct.getNamesOfGroups(); selectedGroups = false; } for (int i = 0; i < namesGroups.size(); i++) { newCountTable.addGroup(namesGroups[i]); } } } if (list != nullptr) { vector binLabels = list->getLabels(); vector newLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { if (groupfile != "") { delete groupMap; outGroup.close(); util.mothurRemove(outputGroupFileName); } out.close(); util.mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector names; vector newNames; util.splitAtComma(binnames, names); int binsize = names.size(); vector newGroupFile; if (groupfile != "") { for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (selectedGroups) { if (util.inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); } }else { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); } } names = newNames; binsize = names.size(); }else if (countfile != "") { binsize = 0; for(int k = 0; k < names.size(); k++) { if (ct.hasGroupInfo()) { if (selectedGroups) { vector thisSeqsGroups = ct.getGroups(names[k]); vector thisGroupCounts = ct.getGroupCounts(names[k]); int thisSeqsCount = 0; for (int n = 0; n < thisSeqsGroups.size(); n++) { if (util.inUsersGroups(thisSeqsGroups[n], Groups)) { thisSeqsCount += thisGroupCounts[n]; } } binsize += thisSeqsCount; //if you don't have any seqs from the groups the user wants, then remove you. if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); } else { newNames.push_back(names[k]); } }else { //all groups binsize += ct.getNumSeqs(names[k]); newNames.push_back(names[k]); } }else { binsize += ct.getNumSeqs(names[k]); newNames.push_back(names[k]); } } }else { newNames = names; } if (binsize > nseqs) { //keep bin string saveBinNames = util.getStringFromVector(newNames, ","); newList.push_back(saveBinNames); newLabels.push_back(binLabels[i]); if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } else if (countfile != "") { for(int k = 0; k < newNames.size(); k++) { if (ct.hasGroupInfo()) { vector groupCounts = ct.getGroupCounts(newNames[k]); newCountTable.push_back(newNames[k], groupCounts); }else { int reps = ct.getNumSeqs(newNames[k]); newCountTable.push_back(newNames[k], reps); } } } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.print(out, false); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (countfile != "") { if (newCountTable.hasGroupInfo()) { vector allGroups = newCountTable.getNamesOfGroups(); for (int i = 0; i < allGroups.size(); i++) { if (!util.inUsersGroups(allGroups[i], Groups)) { newCountTable.removeGroup(allGroups[i]); } } } newCountTable.printTable(outputCountFileName, true); outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences.\n"); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } } //********************************************************************************************************************** void RemoveRareCommand::processSabund(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sabundfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sabundfile)); variables["[extension]"] = util.getExtension(sabundfile); string outputFileName = getOutputFileName("sabund", variables); outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. InputData input(sabundfile, "sabund", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SAbundVector* sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); while (sabund != nullptr) { if (m->getControl_pressed()) { delete sabund; break; } if (sabund->getMaxRank() > nseqs) { for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); } }else { sabund->clear(); } if (sabund->getNumBins() > 0) { sabund->print(out); } delete sabund; sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processSabund"); exit(1); } } //********************************************************************************************************************** void RemoveRareCommand::processRabund(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(rabundfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(rabundfile)); variables["[extension]"] = util.getExtension(rabundfile); string outputFileName = getOutputFileName("rabund", variables); outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. InputData input(rabundfile, "rabund", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; RAbundVector* rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); while (rabund != nullptr) { if (m->getControl_pressed()) { delete rabund; break; } RAbundVector newRabund; newRabund.setLabel(rabund->getLabel()); for (int i = 0; i < rabund->getNumBins(); i++) { if (rabund->get(i) > nseqs) { newRabund.push_back(rabund->get(i)); } } if (newRabund.getNumBins() > 0) { newRabund.print(out); } delete rabund; rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processRabund"); exit(1); } } //********************************************************************************************************************** void RemoveRareCommand::processShared(){ try { //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } processLookup(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processShared"); exit(1); } } //********************************************************************************************************************** void RemoveRareCommand::processLookup(SharedRAbundVectors*& lookup){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); variables["[tag]"] = lookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); vector newRabunds; newRabunds.resize(lookup->size()); vector headers; vector namesOfGroups = lookup->getNamesGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { newRabunds[i].setGroup(namesOfGroups[i]); newRabunds[i].setLabel(lookup->getLabel()); } vector data = lookup->getSharedRAbundVectors(); vector currentLabels = lookup->getOTUNames(); if (byGroup) { //for each otu for (int i = 0; i < lookup->getNumBins(); i++) { bool allZero = true; if (m->getControl_pressed()) { out.close(); return; } //for each group vector abunds = lookup->getOTU(i); for (int j = 0; j < abunds.size(); j++) { //are you rare? if (abunds[j] > nseqs) { allZero = false; } else { abunds[j] = 0; } } //eliminates zero otus if (allZero) { } else { for (int j = 0; j < abunds.size(); j++) { newRabunds[j].push_back(abunds[j]); } headers.push_back(currentLabels[i]); } } }else { //for each otu for (int i = 0; i < lookup->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); return; } int totalAbund = lookup->getOTUTotal(i); //eliminates otus below rare cutoff if (totalAbund <= nseqs) { } //ignore else { headers.push_back(currentLabels[i]); for (int j = 0; j < data.size(); j++) { newRabunds[j].push_back(data[j]->get(i)); } } } } //do we have any otus above the rare cutoff if (newRabunds[0].getNumBins() != 0) { out << "label\tGroup\tnumOtus"; for (int j = 0; j < headers.size(); j++) { out << '\t' << headers[j]; } out << endl; for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].print(out); } } out.close(); } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processLookup"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removerarecommand.h000077500000000000000000000023151424121717000221050ustar00rootroot00000000000000#ifndef REMOVERARECOMMAND_H #define REMOVERARECOMMAND_H /* * removerarecommand.h * mothur * * Created by westcott on 1/21/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" #include "sharedrabundvectors.hpp" class RemoveRareCommand : public Command { public: RemoveRareCommand(string); ~RemoveRareCommand(){} vector setParameters(); string getCommandName() { return "remove.rare"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Remove.rare"; } string getDescription() { return "removes rare sequences from a sabund, rabund, shared or list and group file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sabundfile, rabundfile, sharedfile, groupfile, countfile, listfile, groups, label; int nseqs, allLines; bool abort, byGroup; vector outputNames, Groups; set labels; void processSabund(); void processRabund(); int processList(); void processShared(); void processLookup(SharedRAbundVectors*&); }; #endif mothur-1.48.0/source/commands/removeseqscommand.cpp000077500000000000000000001314351424121717000224700ustar00rootroot00000000000000/* * removeseqscommand.cpp * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "removeseqscommand.h" #include "sequence.hpp" #include "listvector.hpp" #include "counttable.h" #include "fastqread.h" #include "inputdata.h" #include "contigsreport.hpp" #include "alignreport.hpp" //********************************************************************************************************************** vector RemoveSeqsCommand::setParameters(){ try { CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FNGLT", "FNGLT", "none","contigsreport",false,false); parameters.push_back(pcontigsreport); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "illumina1.8+", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["contigsreport"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RemoveSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq, contigsreport or alignreport file.\n"; helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n"; helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, contigsreport, fastq and dups. You must provide accnos and at least one of the file parameters.\n"; helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n"; helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n"; helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RemoveSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } else if (type == "fastq") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } else if (type == "count") { pattern = "[filename],pick,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } else if (type == "qfile") { pattern = "[filename],pick,[extension]"; } else if (type == "alignreport") { pattern = "[filename],pick.[extension]"; } else if (type == "contigsreport") { pattern = "[filename],pick.[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RemoveSeqsCommand::RemoveSeqsCommand(string accnos, pair dupsFile, string dupsFileType) { try { names = util.readAccnos(accnos); dups = true; abort = false; calledHelp = false; vector tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; if (dupsFile.first != "") { if (dupsFileType == "count") { readCount(dupsFile.first, dupsFile.second); } else { readName(dupsFile.first, dupsFile.second); } } } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** RemoveSeqsCommand::RemoveSeqsCommand(unordered_set n, pair dupsFile, string dupsFileType) { try { names = n; dups = true; abort = false; calledHelp = false; vector tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; if (dupsFile.first != "") { if (dupsFileType == "count") { readCount(dupsFile.first, dupsFile.second); } else { readName(dupsFile.first, dupsFile.second); } } } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** RemoveSeqsCommand::RemoveSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = current->getAccnosFile(); if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter.\n"); } else { m->mothurOut("[ERROR]: You have no valid accnos file and accnos is required.\n"); abort = true; } }else { current->setAccnosFile(accnosfile); } fastafiles = validParameter.validFiles(parameters, "fasta"); if (fastafiles.size() != 0) { if (fastafiles[0] == "not open") { abort = true; } else { current->setFastaFile(fastafiles[0]); } } namefiles = validParameter.validFiles(parameters, "name"); if (namefiles.size() != 0) { if (namefiles[0] == "not open") { abort = true; } else { current->setNameFile(namefiles[0]); } } groupfiles = validParameter.validFiles(parameters, "group"); if (groupfiles.size() != 0) { if (groupfiles[0] == "not open") { abort = true; } else { current->setGroupFile(groupfiles[0]); } } alignfiles = validParameter.validFiles(parameters, "alignreport"); if (alignfiles.size() != 0) { if (alignfiles[0] == "not open") { abort = true; } } contigsreportfiles = validParameter.validFiles(parameters, "contigsreport"); if (contigsreportfiles.size() != 0) { if (contigsreportfiles[0] == "not open") { abort = true; } else { current->setContigsReportFile(contigsreportfiles[0]); } } listfiles = validParameter.validFiles(parameters, "list"); if (listfiles.size() != 0) { if (listfiles[0] == "not open") { abort = true; } else { current->setListFile(listfiles[0]); } } taxfiles = validParameter.validFiles(parameters, "taxonomy"); if (taxfiles.size() != 0) { if (taxfiles[0] == "not open") { abort = true; } else { current->setTaxonomyFile(taxfiles[0]); } } countfiles = validParameter.validFiles(parameters, "count"); if (countfiles.size() != 0) { if (countfiles[0] == "not open") { abort = true; } else { current->setCountFile(countfiles[0]); } } fastqfiles = validParameter.validFiles(parameters, "fastq"); if (fastqfiles.size() != 0) { if (fastqfiles[0] == "not open") { abort = true; } } qualityfiles = validParameter.validFiles(parameters, "qfile"); if (qualityfiles.size() != 0) { if (qualityfiles[0] == "not open") { abort = true; } else { current->setQualFile(qualityfiles[0]); } } if ((qualityfiles.size() == 0) && (fastqfiles.size() == 0) && (countfiles.size() == 0) && (fastafiles.size() == 0) && (namefiles.size() == 0) && (listfiles.size() == 0) && (groupfiles.size() == 0) && (alignfiles.size() == 0) && (taxfiles.size() == 0) && (contigsreportfiles.size() == 0)) { m->mothurOut("You must provide a file.\n"); abort = true; } string usedDups = "true"; string temp = validParameter.valid(parameters, "dups"); if (temp == "not found") { if (namefiles.size() != 0) { temp = "true"; } else { temp = "false"; usedDups = ""; } } dups = util.isTrue(temp); format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting.\n" ); abort=true; } } if (!abort) { names = util.readAccnos(accnosfile); } } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand"); exit(1); } } //********************************************************************************************************************** int RemoveSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (m->getControl_pressed()) { return 0; } //read through the correct file and output lines you want to keep if (namefiles.size() != 0) { for (int i = 0; i < namefiles.size(); i++) { readName(namefiles[i]); } } if (fastafiles.size() != 0) { for (int i = 0; i < fastafiles.size(); i++) { readFasta(fastafiles[i]); } } if (qualityfiles.size() != 0) { for (int i = 0; i < qualityfiles.size(); i++) { readQual(qualityfiles[i]); } } if (groupfiles.size() != 0) { for (int i = 0; i < groupfiles.size(); i++) { readGroup(groupfiles[i]); } } if (taxfiles.size() != 0) { for (int i = 0; i < taxfiles.size(); i++) { readTax(taxfiles[i]); } } if (listfiles.size() != 0) { for (int i = 0; i < listfiles.size(); i++) { readList(listfiles[i]); } } if (alignfiles.size() != 0) { for (int i = 0; i < alignfiles.size(); i++) { readAlign(alignfiles[i]); } } if (countfiles.size() != 0) { for (int i = 0; i < countfiles.size(); i++) { readCount(countfiles[i]); } } if (fastqfiles.size() != 0) { for (int i = 0; i < fastqfiles.size(); i++) { readFastq(fastqfiles[i]); } } if (contigsreportfiles.size() != 0) { for (int i = 0; i < contigsreportfiles.size(); i++) { readContigs(contigsreportfiles[i]); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readFasta(string fastafile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string name; bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } Sequence currSeq(in); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(currSeq.getName()); if (it != uniqueMap.end()) { currSeq.setName(it->second); } } name = currSeq.getName(); if (name != "") { //if this name is in the accnos file if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; currSeq.printSequence(out); }else { m->mothurOut("[WARNING]: " + name + " is in your fasta file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastafile + " contains only sequences from the .accnos file.\n"); } outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + fastafile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readGZFastq(string fastqfile){ try { #ifdef USE_BOOST string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); variables["[extension]"] = ".fastq" + util.getExtension(fastqfile); string outputFileName = getOutputFileName("fastq", variables); ifstream in; boost::iostreams::filtering_istream inBoost; util.openInputFileBinary(fastqfile, in, inBoost); ofstream file; ostream* out; boost::iostreams::filtering_streambuf outBoost; util.openOutputFileBinary(outputFileName, file, out, outBoost); bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!inBoost.eof()){ if (m->getControl_pressed()) { break; } //read sequence name bool ignore; FastqRead fread(inBoost, ignore, format); gobble(inBoost); if (!ignore) { string name = fread.getName(); if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; fread.printFastq(*out); uniqueNames.insert(name); }else { m->mothurOut("[WARNING]: " + name + " is in your fastq file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } gobble(inBoost); } in.close(); inBoost.pop(); boost::iostreams::close(outBoost); file.close(); delete out; if (m->getControl_pressed()) { util.mothurRemove(outputFileName); return; } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastqfile + " contains only sequences from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + fastqfile + ".\n"); #else m->mothurOut("[ERROR]: mothur requires the boost libraries to read and write compressed files. Please decompress your files and rerun.\n"); #endif return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readFastq(string fastqfile){ try { bool gz = util.isGZ(fastqfile)[1]; if (gz) { readGZFastq(fastqfile); return; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); variables["[extension]"] = util.getExtension(fastqfile); string outputFileName = getOutputFileName("fastq", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastqfile, in); bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } //read sequence name bool ignore; FastqRead fread(in, ignore, format); gobble(in); if (!ignore) { string name = fread.getName(); if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet wroteSomething = true; fread.printFastq(out); uniqueNames.insert(name); }else { m->mothurOut("[WARNING]: " + name + " is in your fastq file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + fastqfile + " contains only sequences from the .accnos file.\n"); } outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + fastqfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readQual(string qualfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); variables["[extension]"] = util.getExtension(qualfile); string outputFileName = getOutputFileName("qfile", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(qualfile, in); string name; bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ QualityScores qual(in); gobble(in); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(qual.getName()); if (it != uniqueMap.end()) { qual.setName(it->second); } } string name = qual.getName(); if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; qual.printQScores(out); }else { m->mothurOut("[WARNING]: " + name + " is in your qfile more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + qualfile + " contains only sequences from the .accnos file.\n"); } outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + qualfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readQual"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readCount(string countfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); readCount(countfile, outputFileName); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readCount"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readCount(string countfile, string outputFileName){ try { CountTable ct; ct.readTable(countfile, true, false); int originalCount = ct.getNumSeqs(); for (auto it = names.begin(); it != names.end(); it++) { ct.zeroOutSeq(*it); if (m->getControl_pressed()) { return; } } ct.printTable(outputFileName); int removedCount = originalCount - ct.getNumSeqs(); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + countfile + ".\n"); if (ct.getNumSeqs() == 0) { m->mothurOut("[WARNING]: " + countfile + " contains only sequences from the .accnos file.\n"); } return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readCount"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readList(string listfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); InputData input(listfile, "list", nullVector); ListVector* list = input.getListVector(); bool wroteSomething = false; int removedCount = 0; while(list != nullptr) { removedCount = 0; set uniqueNames; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector binLabels = list->getLabels(); vector newBinLabels; if (m->getControl_pressed()) { out.close(); return; } //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(outputFileName); return; } //parse out names that are in accnos file string bin = list->get(i); vector bnames; util.splitAtComma(bin, bnames); string newNames = ""; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; //if that name is in the .accnos file, add it if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); newNames += name + ","; }else { m->mothurOut("[WARNING]: " + name + " is in your list file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } } else { removedCount++; } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.print(out, false); } out.close(); delete list; list = input.getListVector(); } if (wroteSomething == false) { m->mothurOut("[WARNING]: " + listfile + " contains only sequences from the .accnos file.\n"); } m->mothurOut("Removed " + toString(removedCount) + " sequences from " + listfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readName(string namefile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputFileName = getOutputFileName("name", variables); readName(namefile, outputFileName); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readName(string namefile, string outputFileName){ try { ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(namefile, in); string name, firstCol, secondCol; bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> firstCol; gobble(in); in >> secondCol; gobble(in); vector parsedNames; util.splitAtComma(secondCol, parsedNames); vector validSecond; validSecond.clear(); vector parsedNames2; bool parsedError = false; for (int i = 0; i < parsedNames.size(); i++) { if (names.count(parsedNames[i]) == 0) { if (uniqueNames.count(parsedNames[i]) == 0) { //this name hasn't been seen yet uniqueNames.insert(parsedNames[i]); validSecond.push_back(parsedNames[i]); parsedNames2.push_back(parsedNames[i]); }else { m->mothurOut("[WARNING]: " + parsedNames[i] + " is in your name file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); parsedError = true; } } } if (parsedError) { parsedNames = parsedNames2; } if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); } removedCount += parsedNames.size(); }else { if (validSecond.size() != 0) { removedCount += parsedNames.size()-validSecond.size(); //if the name in the first column is in the set then print it and any other names in second column also in set if (names.count(firstCol) == 0) { wroteSomething = true; out << firstCol << '\t'; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; //make first name in set you come to first column and then add the remaining names to second column }else { //you want part of this row if (validSecond.size() != 0) { wroteSomething = true; out << validSecond[0] << '\t'; //we are changing the unique name in the fasta file uniqueMap[firstCol] = validSecond[0]; //you know you have at least one valid second since first column is valid for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; } out << validSecond[validSecond.size()-1] << endl; } } } } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + namefile + " contains only sequences from the .accnos file.\n"); } outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + namefile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readName"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readGroup(string groupfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outputFileName = getOutputFileName("group", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(groupfile, in); string name, group; bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); //read from first column in >> group; //read from second column //if this name is in the accnos file if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; out << name << '\t' << group << endl; }else { m->mothurOut("[WARNING]: " + name + " is in your group file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + groupfile + " contains only sequences from the .accnos file.\n"); } outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + groupfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readGroup"); exit(1); } } //********************************************************************************************************************** void RemoveSeqsCommand::readTax(string taxfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; bool wroteSomething = false; int removedCount = 0; set uniqueNames; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } in >> name; gobble(in); tax = util.getline(in); gobble(in); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } //if this name is in the accnos file if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; out << name << '\t' << tax << endl; }else { m->mothurOut("[WARNING]: " + name + " is in your taxonomy file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + taxfile + " contains only sequences from the .accnos file.\n"); } outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + taxfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name void RemoveSeqsCommand::readAlign(string alignfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(alignfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(alignfile)); variables["[extension]"] = util.getExtension(alignfile); string outputFileName = getOutputFileName("alignreport", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(alignfile, in); bool wroteSomething = false; int removedCount = 0; set uniqueNames; AlignReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return; } report.read(in); gobble(in); string name = report.getQueryName(); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } //if this name is in the accnos file if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; report.print(out); }else { m->mothurOut("[WARNING]: " + name + " is in your alignreport file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + alignfile + " contains only sequences from the .accnos file.\n"); ofstream out1; util.openOutputFile(outputFileName, out1); out1.close(); } outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + alignfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readAlign"); exit(1); } } //********************************************************************************************************************** //contigsreport file has a column header line then all other lines contain 8 columns. we just want the first column since that contains the name void RemoveSeqsCommand::readContigs(string contigsreportfile){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(contigsreportfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(contigsreportfile)); variables["[extension]"] = util.getExtension(contigsreportfile); string outputFileName = getOutputFileName("contigsreport", variables); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(contigsreportfile, in); bool wroteSomething = false; int removedCount = 0; set uniqueNames; ContigsReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); while(!in.eof()){ if (m->getControl_pressed()) { break; } report.read(in); gobble(in); string name = report.getName(); if (!dups) {//adjust name if needed map::iterator it = uniqueMap.find(name); if (it != uniqueMap.end()) { name = it->second; } } if (names.count(name) == 0) { if (uniqueNames.count(name) == 0) { //this name hasn't been seen yet uniqueNames.insert(name); wroteSomething = true; report.print(out); }else { m->mothurOut("[WARNING]: " + name + " is in your contigsreport file more than once. Mothur requires sequence names to be unique. I will only add it once.\n"); } }else { removedCount++; } } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("[WARNING]: " + contigsreportfile + " only contains sequences from the .accnos file, everything removed.\n"); ofstream out1; util.openOutputFile(outputFileName, out1); out1.close(); } //reopening file clears header line outputNames.push_back(outputFileName); outputTypes["contigsreport"].push_back(outputFileName); m->mothurOut("Removed " + toString(removedCount) + " sequences from " + contigsreportfile + ".\n"); return; } catch(exception& e) { m->errorOut(e, "RemoveSeqsCommand", "readContigs"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/removeseqscommand.h000077500000000000000000000036311424121717000221310ustar00rootroot00000000000000#ifndef REMOVESEQSCOMMAND_H #define REMOVESEQSCOMMAND_H /* * removeseqscommand.h * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" class RemoveSeqsCommand : public Command { public: RemoveSeqsCommand(string); RemoveSeqsCommand(string, pair dupsFile, string dupsFileType); RemoveSeqsCommand(unordered_set, pair dupsFile, string dupsFileType); ~RemoveSeqsCommand(){} vector setParameters(); string getCommandName() { return "remove.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Remove.seqs"; } string getDescription() { return "removes sequences from a list, fasta, name, group, alignreport, contigsreport, quality or taxonomy file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: unordered_set names; vector fastafiles, namefiles, groupfiles, countfiles, alignfiles, listfiles, taxfiles, fastqfiles, contigsreportfiles, qualityfiles, outputNames; string accnosfile, format; bool abort, dups; map uniqueMap; void readFasta(string); void readFastq(string); void readGZFastq(string); void readName(string); //inputNameFile, mothur generates output name void readName(string, string); //inputNameFile, outputName (internal use) void readGroup(string); void readCount(string); //inputCountFile, mothur generates output name void readCount(string, string); //inputCountFile, outputName (internal use) void readAlign(string); void readContigs(string); void readList(string); void readTax(string); void readQual(string); }; #endif mothur-1.48.0/source/commands/renamefilecommand.cpp000077500000000000000000000627011424121717000224050ustar00rootroot00000000000000// // renamefilecommand.cpp // Mothur // // Created by Sarah Westcott on 4/18/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "renamefilecommand.h" #include "systemcommand.h" //********************************************************************************************************************** vector RenameFileCommand::setParameters(){ try { CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pflow); CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfile); CommandParameter pbiom("biom", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pbiom); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcolumn); CommandParameter psummary("summary", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psummary); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname); CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptaxonomy); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pqfile); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter prabund("rabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psabund); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign); CommandParameter porder("order", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(porder); CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptree); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pshared); CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcount); CommandParameter poutputname("new", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poutputname); CommandParameter pinputname("input", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pinputname); CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prelabund); CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psff); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter pmothurgenerated("shorten", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmothurgenerated); CommandParameter pdeleteold("deleteold", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdeleteold); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter pprefix("prefix", "String", "", "", "", "", "","",false,false); parameters.push_back(pprefix); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RenameFileCommand::getHelpString(){ try { string helpString = ""; helpString += "The rename.file command allows you to rename files and updates the current files saved by mothur.\n"; helpString += "The rename.file command parameters are: phylip, column, list, rabund, sabund, name, group, design, tree, shared, relabund, fasta, qfile, sff, oligos, accnos, biom, count, summary, file, taxonomy, constaxonomy, input, new, prefix, deletedold and shorten.\n"; helpString += "The new parameter allows you to provide an output file name for the input file you provide.\n"; helpString += "The shorten parameter is used to inicate you want mothur to generate output file names for you. For example: stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.shared would become stability.an.shared. Default=true."; helpString += "The prefix parameter allows you to enter your own prefix for shortened names."; helpString += "The deleteold parameter indicates whether you want to delete the old file. Default=true."; helpString += "The rename.file command should be in the following format: \n"; helpString += "rename.file(fasta=current, name=current, group=current, taxonomy=current, shorten=t)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RenameFileCommand::getOutputPattern(string type) { try { string pattern = ""; m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); return pattern; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** RenameFileCommand::RenameFileCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; int numFiles = 0; //check for parameters phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { m->mothurOut("Ignoring: " + parameters["phylip"]); m->mothurOutEndLine(); phylipfile = ""; } else if (phylipfile == "not found") { phylipfile = ""; } if (phylipfile != "") { numFiles++; } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { m->mothurOut("Ignoring: " + parameters["column"]); m->mothurOutEndLine(); columnfile = ""; } else if (columnfile == "not found") { columnfile = ""; } if (columnfile != "") { numFiles++; } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { m->mothurOut("Ignoring: " + parameters["list"]); m->mothurOutEndLine(); listfile = ""; } else if (listfile == "not found") { listfile = ""; } if (listfile != "") { numFiles++; } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["rabund"]); m->mothurOutEndLine(); rabundfile = ""; } else if (rabundfile == "not found") { rabundfile = ""; } if (rabundfile != "") { numFiles++; } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["sabund"]); m->mothurOutEndLine(); sabundfile = ""; } else if (sabundfile == "not found") { sabundfile = ""; } if (sabundfile != "") { numFiles++; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { m->mothurOut("Ignoring: " + parameters["name"]); m->mothurOutEndLine(); namefile = ""; } else if (namefile == "not found") { namefile = ""; } if (namefile != "") { numFiles++; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; } else if (groupfile == "not found") { groupfile = ""; } if (groupfile != "") { numFiles++; } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; } else if (countfile == "not found") { countfile = ""; } if (countfile != "") { numFiles++; } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; } else if (designfile == "not found") { designfile = ""; } if (designfile != "") { numFiles++; } inputfile = validParameter.validFile(parameters, "input"); if (inputfile == "not open") { m->mothurOut("Ignoring: " + parameters["input"]); m->mothurOutEndLine(); inputfile = ""; } else if (inputfile == "not found") { inputfile = ""; } if (inputfile != "") { numFiles++; } treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { m->mothurOut("Ignoring: " + parameters["tree"]); m->mothurOutEndLine(); treefile = ""; } else if (treefile == "not found") { treefile = ""; } if (treefile != "") { numFiles++; } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { m->mothurOut("Ignoring: " + parameters["shared"]); m->mothurOutEndLine(); sharedfile = ""; } else if (sharedfile == "not found") { sharedfile = ""; } if (sharedfile != "") { numFiles++; } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["relabund"]); m->mothurOutEndLine(); relabundfile = ""; } else if (relabundfile == "not found") { relabundfile = ""; } if (relabundfile != "") { numFiles++; } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { m->mothurOut("Ignoring: " + parameters["fasta"]); m->mothurOutEndLine(); fastafile = ""; } else if (fastafile == "not found") { fastafile = ""; } if (fastafile != "") { numFiles++; } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { m->mothurOut("Ignoring: " + parameters["qfile"]); m->mothurOutEndLine(); qualfile = ""; } else if (qualfile == "not found") { qualfile = ""; } if (qualfile != "") { numFiles++; } sfffile = validParameter.validFile(parameters, "sff"); if (sfffile == "not open") { m->mothurOut("Ignoring: " + parameters["sff"]); m->mothurOutEndLine(); sfffile = ""; } else if (sfffile == "not found") { sfffile = ""; } if (sfffile != "") { numFiles++; } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not open") { m->mothurOut("Ignoring: " + parameters["oligos"]); m->mothurOutEndLine(); oligosfile = ""; } else if (oligosfile == "not found") { oligosfile = ""; } if (oligosfile != "") { numFiles++; } accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { m->mothurOut("Ignoring: " + parameters["accnos"]); m->mothurOutEndLine(); accnosfile = ""; } else if (accnosfile == "not found") { accnosfile = ""; } if (accnosfile != "") { numFiles++; } taxonomyfile = validParameter.validFile(parameters, "taxonomy"); if (taxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["taxonomy"]); m->mothurOutEndLine(); taxonomyfile = ""; } else if (taxonomyfile == "not found") { taxonomyfile = ""; } if (taxonomyfile != "") { numFiles++; } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["constaxonomy"]); m->mothurOutEndLine(); constaxonomyfile = ""; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } if (constaxonomyfile != "") { numFiles++; } flowfile = validParameter.validFile(parameters, "flow"); if (flowfile == "not open") { m->mothurOut("Ignoring: " + parameters["flow"]); m->mothurOutEndLine(); flowfile = ""; } else if (flowfile == "not found") { flowfile = ""; } if (flowfile != "") { numFiles++; } biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { m->mothurOut("Ignoring: " + parameters["biom"]); m->mothurOutEndLine(); biomfile = ""; } else if (biomfile == "not found") { biomfile = ""; } if (biomfile != "") { numFiles++; } summaryfile = validParameter.validFile(parameters, "summary"); if (summaryfile == "not open") { m->mothurOut("Ignoring: " + parameters["summary"]); m->mothurOutEndLine(); summaryfile = ""; } else if (summaryfile == "not found") { summaryfile = ""; } if (summaryfile != "") { numFiles++; } filefile = validParameter.validFile(parameters, "file"); if (filefile == "not open") { m->mothurOut("Ignoring: " + parameters["file"]); m->mothurOutEndLine(); filefile = ""; } else if (filefile == "not found") { filefile = ""; } if (filefile != "") { numFiles++; } string temp = validParameter.valid(parameters, "shorten"); if (temp == "not found") { temp = "T"; } mothurGenerated = util.isTrue(temp); temp = validParameter.valid(parameters, "deleteold"); if (temp == "not found") { temp = "T"; } deleteOld = util.isTrue(temp); prefix = validParameter.valid(parameters, "prefix"); if (prefix == "not found") { prefix = ""; } outputfile = validParameter.validPath(parameters, "new"); if (outputfile == "not found") { if (!mothurGenerated) { m->mothurOut("[ERROR]: you must enter an output file name\n"); abort=true; } outputfile = ""; }else { mothurGenerated=false; if (outputdir != "") { outputfile = outputdir + util.getSimpleName(outputfile); } } if ((!mothurGenerated) && (numFiles > 1)) { m->mothurOut("[ERROR]: You cannot use more than one file parameter unless mothur is generating the output filenames for you.\n"); abort= true; } if ((mothurGenerated) && (outputfile != "") && (numFiles != 1)) { m->mothurOut("[ERROR]: You must allow mothur to generate the filenames or input one file at a time with a new name, not both.\n"); abort= true; } if (outputdir != "") { outputfile = outputdir + util.getSimpleName(outputfile); } } } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "RenameFileCommand"); exit(1); } } //********************************************************************************************************************** int RenameFileCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string newName = outputfile; //look for file types if (fastafile != "") { newName = getNewName(fastafile, "fasta"); renameOrCopy(fastafile, newName); current->setFastaFile(newName); } if (qualfile != "") { newName = getNewName(qualfile, "qfile"); renameOrCopy(qualfile, newName); current->setQualFile(newName); } if (phylipfile != "") { newName = getNewName(phylipfile, "phylip"); renameOrCopy(phylipfile, newName); current->setPhylipFile(newName); } if (columnfile != "") { newName = getNewName(columnfile, "column"); renameOrCopy(columnfile, newName); current->setColumnFile(newName); } if (listfile != "") { newName = getNewName(listfile, "list"); renameOrCopy(listfile, newName); current->setListFile(newName); } if (rabundfile != "") { newName = getNewName(rabundfile, "rabund"); renameOrCopy(rabundfile, newName); current->setRabundFile(newName); } if (sabundfile != "") { newName = getNewName(sabundfile, "sabund"); renameOrCopy(sabundfile, newName); current->setSabundFile(newName); } if (namefile != "") { newName = getNewName(namefile, "name"); renameOrCopy(namefile, newName); current->setNameFile(newName); } if (groupfile != "") { newName = getNewName(groupfile, "group"); renameOrCopy(groupfile, newName); current->setGroupFile(newName); } if (treefile != "") { newName = getNewName(treefile, "tree"); renameOrCopy(treefile, newName); current->setTreeFile(newName); } if (sharedfile != "") { newName = getNewName(sharedfile, "shared"); renameOrCopy(sharedfile, newName); current->setSharedFile(newName); } if (relabundfile != "") { newName = getNewName(relabundfile, "relabund"); renameOrCopy(relabundfile, newName); current->setRelAbundFile(newName); } if (designfile != "") { newName = getNewName(designfile, "design"); renameOrCopy(designfile, newName); current->setDesignFile(newName); } if (sfffile != "") { newName = getNewName(sfffile, "sff"); renameOrCopy(sfffile, newName); current->setSFFFile(newName); } if (oligosfile != "") { newName = getNewName(oligosfile, "oligos"); renameOrCopy(oligosfile, newName); current->setOligosFile(newName); } if (accnosfile != "") { newName = getNewName(accnosfile, "accnos"); renameOrCopy(accnosfile, newName); current->setAccnosFile(newName); } if (taxonomyfile != "") { newName = getNewName(taxonomyfile, "taxonomy"); renameOrCopy(taxonomyfile, newName); current->setTaxonomyFile(newName); } if (constaxonomyfile != "") { newName = getNewName(constaxonomyfile, "constaxonomy"); renameOrCopy(constaxonomyfile, newName); current->setConsTaxonomyFile(newName); } if (flowfile != "") { newName = getNewName(flowfile, "flow"); renameOrCopy(flowfile, newName); current->setFlowFile(newName); } if (biomfile != "") { newName = getNewName(biomfile, "biom"); renameOrCopy(biomfile, newName); current->setBiomFile(newName); } if (countfile != "") { newName = getNewName(countfile, "count"); renameOrCopy(countfile, newName); current->setCountFile(newName); } if (summaryfile != "") { newName = getNewName(summaryfile, "summary"); renameOrCopy(summaryfile, newName); current->setSummaryFile(newName); } if (filefile != "") { newName = getNewName(filefile, "file"); renameOrCopy(filefile, newName); current->setFileFile(newName); } if (inputfile != "") { newName = getNewName(inputfile, "input"); renameOrCopy(inputfile, newName); } m->mothurOutEndLine(); m->mothurOut("Current files saved by mothur:\n"); if (current->hasCurrentFiles()) { current->printCurrentFiles(""); } return 0; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "execute"); exit(1); } } //********************************************************************************************************************** string RenameFileCommand::getNewName(string inputFileName, string type){ try { string newName = outputfile; inputFileName = util.getFullPathName(inputFileName); if (mothurGenerated) { string extension = util.getExtension(inputFileName); string basicName = "final"; string tag = ""; if (prefix == "") { int pos = inputFileName.find_first_of("."); if (pos != string::npos) { basicName = util.getSimpleName(inputFileName.substr(0, pos)); } }else { basicName = prefix; } if ((type == "shared") || (type == "list") || (type == "relabund") || (type == "rabund") || (type == "sabund")) { vector tags; tags.push_back(".an."); tags.push_back(".tx."); tags.push_back(".agc."); tags.push_back(".dgc."); tags.push_back(".nn."); tags.push_back(".fn."); tags.push_back(".wn."); tags.push_back(".opti_"); for (int i = 0; i < tags.size(); i++) { int pos2 = inputFileName.find(tags[i]); if (pos2 != string::npos) { int pos3 = inputFileName.substr(pos2+1).find_first_of('.'); tag = inputFileName.substr(pos2+1, pos3); break; } } }else if (type == "constaxonomy") { extension = ".cons.taxonomy"; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(inputFileName); } newName = thisOutputDir + basicName; if (tag != "") { newName += "." + tag; } newName += extension; } return newName; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "getNewFileName"); exit(1); } } //********************************************************************************************************************** string RenameFileCommand::renameOrCopy(string oldName, string newName){ try { if (deleteOld) { util.renameFile(oldName, newName); } else { string command = "copy "; #if defined NON_WINDOWS command = "cp "; #endif string inputString = command + oldName + " " + newName; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: system(" + inputString + ")\n"); current->setMothurCalling(true); Command* systemCommand = new SystemCommand(inputString); systemCommand->execute(); delete systemCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } return newName; } catch(exception& e) { m->errorOut(e, "RenameFileCommand", "renameOrCopy"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/renamefilecommand.h000077500000000000000000000027231424121717000220500ustar00rootroot00000000000000// // renamefilecommand.h // Mothur // // Created by Sarah Westcott on 4/18/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__renamefilecommand__ #define __Mothur__renamefilecommand__ #include "command.hpp" class RenameFileCommand : public Command { #ifdef UNIT_TEST friend class TestRenameFileCommand; #endif public: RenameFileCommand(string); ~RenameFileCommand(){} vector setParameters(); string getCommandName() { return "rename.file"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/rename.file"; } string getDescription() { return "renames file and updates current"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile, summaryfile, inputfile; string treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, flowfile, filefile, outputfile, constaxonomyfile, prefix; bool mothurGenerated, abort, deleteOld; vector outputNames; string getNewName(string name, string type); string renameOrCopy(string oldName, string newName); }; #endif /* defined(__Mothur__renamefilecommand__) */ mothur-1.48.0/source/commands/renameseqscommand.cpp000077500000000000000000001310501424121717000224330ustar00rootroot00000000000000// // renameseqscommand.cpp // Mothur // // Created by SarahsWork on 5/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "renameseqscommand.h" #include "sequence.hpp" #include "groupmap.h" #include "counttable.h" #include "qualityscores.h" #include "contigsreport.hpp" #include "inputdata.h" #include "fastqread.h" //********************************************************************************************************************** vector RenameSeqsCommand::setParameters(){ try { CommandParameter pfile("file", "InputTypes", "", "", "fileFasta-file", "fileFasta", "none","fasta",false,false,true); parameters.push_back(pfile); CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none","fasta",false,false,true); parameters.push_back(pmap); CommandParameter pfasta("fasta", "InputTypes", "", "", "fileFasta-file", "fileFasta", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pfastq("fastq", "InputTypes", "", "", "fileFasta-file", "fileFasta", "none","fasta",false,false,true); parameters.push_back(pfastq); CommandParameter plist("list", "InputTypes", "", "", "fileFasta-file", "fileFasta", "none","fasta",false,false,true); parameters.push_back(plist); CommandParameter pqfile("qfile", "InputTypes", "", "", "file", "none", "none","qfile",false,false,true); parameters.push_back(pqfile); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "file", "none", "none","contigsreport",false,false,true); parameters.push_back(pcontigsreport); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "file", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy); CommandParameter pname("name", "InputTypes", "", "", "NameCount-file", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup-file", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup-file", "none", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter pdelim("delim", "String", "", "_", "", "", "","",false,false); parameters.push_back(pdelim); CommandParameter pplacement("placement", "Multiple", "front-back", "back", "", "", "","",false,false); parameters.push_back(pplacement); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["map"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["file"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["contigsreport"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string RenameSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The rename.seqs command renames sequences in the input files. By default, mothur will generate new names based on your inputs. Alternatively, you can provide a map file.\n"; helpString += "The rename.seqs command parameters are " + getCommandParameters() + ".\n"; helpString += "The list parameter allows you to provide an associated list file.\n"; helpString += "The fasta parameter allows you to provide an associated fasta file.\n"; helpString += "The qfile parameter allows you to provide an associated quality file.\n"; helpString += "The taxonomy parameter allows you to provide an associated taxonomy file.\n"; helpString += "The contigsreport allows you to provide an associated contigsreport file.\n"; helpString += "The file parameter is 2, 3 or 4 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. Mothur will process each pair and create a renamed fastq and file file.\n"; helpString += "The placement parameter allows you to indicate whether you would like the group name appended to the front or back of the sequence number. Options are front or back. Default=back.\n"; helpString += "The delim parameter allow you to enter the character or characters you would like to separate the sequence number from the group name. Default='_'.\n"; helpString += "The rename.seqs command should be in the following format: \n"; helpString += "The rename.seqs command should be in the following format: \n"; helpString += "rename.seqs(fasta=yourFastaFile, group=yourGroupFile) \n"; helpString += "Example rename.seqs(fasta=abrecovery.unique.fasta, group=abrecovery.group).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string RenameSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],renamed,[extension]"; } else if (type == "name") { pattern = "[filename],renamed,[extension]"; } else if (type == "group") { pattern = "[filename],renamed,[extension]"; } else if (type == "count") { pattern = "[filename],renamed,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],renamed,[extension]"; } else if (type == "qfile") { pattern = "[filename],renamed,[extension]"; } else if (type == "fastq") { pattern = "[filename],renamed,[extension]"; } else if (type == "file") { pattern = "[filename],renamed,[extension]"; } else if (type == "contigsreport") { pattern = "[filename],renamed,[extension]"; } else if (type == "list") { pattern = "[filename],renamed,[extension]"; } else if (type == "map") { pattern = "[filename],renamed_map"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "getOutputPattern"); exit(1); } } /**************************************************************************************/ RenameSeqsCommand::RenameSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters fastaFile = validParameter.validFile(parameters, "fasta"); if (fastaFile == "not open") { abort = true; } else if (fastaFile == "not found") { fastaFile = ""; } else { current->setFastaFile(fastaFile); } fastqfile = validParameter.validFile(parameters, "fastq"); if (fastqfile == "not open") { abort = true; } else if (fastqfile == "not found") { fastqfile = ""; } fileFile = validParameter.validFile(parameters, "file"); if (fileFile == "not open") { abort = true; } else if (fileFile == "not found") { fileFile = ""; } else { current->setFileFile(fileFile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } nameFile = validParameter.validFile(parameters, "name"); if (nameFile == "not open") { abort = true; } else if (nameFile == "not found"){ nameFile =""; } else { current->setNameFile(nameFile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { abort = true; } else if (qualfile == "not found"){ qualfile =""; } else { current->setQualFile(qualfile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found"){ listfile =""; } else { current->setListFile(listfile); } mapFile = validParameter.validFile(parameters, "map"); if (mapFile == "not open") { abort = true; } else if (mapFile == "not found"){ mapFile = ""; } contigsfile = validParameter.validFile(parameters, "contigsreport"); if (contigsfile == "not open") { abort = true; } else if (contigsfile == "not found"){ contigsfile = ""; } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } if ((countfile != "") && (nameFile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if ((fileFile != "") && (fastaFile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: file or fasta.\n"); abort = true; } if ((countfile != "") && (groupfile != "")) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group.\n"); abort = true; } if ((fileFile != "") && ((listfile != "") || (nameFile != "") || (fastqfile != "") || (groupfile != "") || (qualfile != "") || (contigsfile != "") || (countfile != "") || (fastaFile != "")) ) { m->mothurOut("[ERROR]: The file option cannot be used with any other files.\n"); abort = true; }else if ((fileFile == "") && (listfile == "") && (nameFile == "") && (fastqfile == "") && (groupfile == "") && (qualfile == "") && (contigsfile == "") && (countfile == "") && (fastaFile == "")) { m->mothurOut("[ERROR]: No input files provided, please correct.\n"); abort = true; } placement = validParameter.valid(parameters, "placement"); if (placement == "not found") { placement = "back"; } if ((placement == "front") || (placement == "back")) { } else { m->mothurOut("[ERROR]: " + placement + " is not a valid placement option. Valid placement options are front or back.\n"); abort = true; } delim = validParameter.valid(parameters, "delim"); if (delim == "not found") { delim = "_"; } } } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand"); exit(1); } } /**************************************************************************************/ int RenameSeqsCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } ignoreNew = false; map renameMap; bool printMap = false; if (mapFile != "") { readMapFile(renameMap); ignoreNew = true; } else { printMap = true; } if (fileFile != "") { processFile(); } else { map old2NewNameMap; if ((nameFile != "") || (countfile != "") || (groupfile != "")) { processNameGroupCountFiles(renameMap, old2NewNameMap); }else if (mapFile != "") { old2NewNameMap = renameMap; } renameMap.clear(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (listfile != "") { readList(old2NewNameMap); } if (fastaFile != "") { readFasta(old2NewNameMap); } if (fastqfile != "") { readFastq(old2NewNameMap); } if (qualfile != "") { readQual(old2NewNameMap); } if (contigsfile != "") { readContigs(old2NewNameMap); } if (taxfile != "") { readTax(old2NewNameMap); } if (printMap) { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastaFile); } map variables; string outMapFile = thisOutputDir + util.getRootName(util.getSimpleName(fastaFile)); variables["[filename]"] = outMapFile; outMapFile = getOutputFileName("map", variables); outputNames.push_back(outMapFile); outputTypes["map"].push_back(outMapFile); ofstream outMap; util.openOutputFile(outMapFile, outMap); //print map for(map::iterator it = old2NewNameMap.begin(); it != old2NewNameMap.end(); it++) { outMap << it->second << '\t' << it->first << endl; } outMap.close(); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("file"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFileFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::processNameGroupCountFiles(map& oldMap, map& old2NewNameMap){ try { bool oldMapEmpty = true; if (oldMap.size() != 0) { oldMapEmpty = false; } GroupMap* groupMap = nullptr; CountTable* countTable = nullptr; bool hasGroups = false; vector Groups; if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return; } Groups = groupMap->getNamesOfGroups(); hasGroups = true; }else if (countfile != "") { countTable = new CountTable(); countTable->readTable(countfile, true, false); hasGroups = countTable->hasGroupInfo(); if (hasGroups) { Groups = countTable->getNamesOfGroups(); Groups.push_back("Multi"); } } //set up for reads map counts; for (int i = 0; i < Groups.size(); i++) { counts[Groups[i]] = 1; } string thisOutputDir = outputdir; if (nameFile != "") { map > nameMap; thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(nameFile); } string outNameFile = thisOutputDir + util.getRootName(util.getSimpleName(nameFile)); map variables; variables["[filename]"] = outNameFile; variables["[extension]"] = util.getExtension(nameFile); outNameFile = getOutputFileName("name", variables); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); ofstream outName; util.openOutputFile(outNameFile, outName); util.readNames(nameFile, nameMap); for (map >::iterator itNames = nameMap.begin(); itNames != nameMap.end(); itNames++) { vector dups = itNames->second; if (m->getControl_pressed()) { break; } if (oldMapEmpty) { for (int i = 0; i < dups.size(); i++) { string group = ""; if (groupfile != "") { group = groupMap->getGroup(dups[i]); }else if (countfile != "") { if (hasGroups) { vector thisReadsGroups = countTable->getGroups(dups[i]); if (thisReadsGroups.size() == 0) { group = "not found"; } else if (thisReadsGroups.size() == 1) { group = thisReadsGroups[0]; } else { group = "Multi"; } } } if (group == "not found") { m->mothurOut("[ERROR]: " + dups[i] + " is not in your file, please correct.\n"); m->setControl_pressed(true); } else { string newName = toString(counts[group]); counts[group]++; if ((placement == "back") && (group != "")) { newName += delim + group; } else if (group != "") { newName = group + delim + newName; } if (i == 0) { outName << newName << '\t' << newName; } else { outName << "," << newName; } oldMap[newName] = dups[i]; old2NewNameMap[dups[i]] = newName; } } outName << endl; }else { for (int i = 0; i < dups.size(); i++) { //get new name string newName = ""; map::iterator itMap = oldMap.find(dups[i]); if (itMap == oldMap.end()) { m->mothurOut("[ERROR]: " + dups[i] + " is not in your map file, please correct.\n"); m->setControl_pressed(true);} else { newName = itMap->second; } if (i == 0) { outName << newName << '\t' << newName; } else { outName << "," << newName; } old2NewNameMap[dups[i]] = newName; } outName << endl; } } outName.close(); } if (groupfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } string outGroupFile = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); map variables; variables["[filename]"] = outGroupFile; variables["[extension]"] = util.getExtension(groupfile); outGroupFile = getOutputFileName("group", variables); outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); ofstream outGroup; util.openOutputFile(outGroupFile, outGroup); vector namesOfSeqs = groupMap->getNamesSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { string group = groupMap->getGroup(namesOfSeqs[i]); string newName = ""; if (group == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is not in your file, please correct.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { break; } if (oldMapEmpty) { map::iterator itMap = old2NewNameMap.find(namesOfSeqs[i]); if (itMap == old2NewNameMap.end()) { newName = toString(counts[group]); counts[group]++; if ((placement == "back") && (group != "")) { newName += delim + group; } else if (group != "") { newName = group + delim + newName; } }else { newName = itMap->second; //newName = name from namefile } oldMap[newName] = namesOfSeqs[i]; }else { map::iterator itMap = oldMap.find(namesOfSeqs[i]); if (itMap == oldMap.end()) { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is not in your map file, please correct.\n"); m->setControl_pressed(true);} else { newName = itMap->second; } } outGroup << newName << '\t' << group << endl; old2NewNameMap[namesOfSeqs[i]] = newName; } outGroup.close(); } if (countfile != "") { thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } string outCountFile = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); map variables; variables["[filename]"] = outCountFile; variables["[extension]"] = util.getExtension(countfile); outCountFile = getOutputFileName("count", variables); outputNames.push_back(outCountFile); outputTypes["count"].push_back(outCountFile); vector namesOfSeqs = countTable->getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } string newName = ""; if (oldMapEmpty) { map::iterator itMap = old2NewNameMap.find(namesOfSeqs[i]); if (itMap == old2NewNameMap.end()) { string group = ""; if (hasGroups) { vector thisReadsGroups = countTable->getGroups(namesOfSeqs[i]); if (thisReadsGroups.size() == 0) { group = "not found"; } else if (thisReadsGroups.size() == 1) { group = thisReadsGroups[0]; } else { group = "Multi"; } } if (group == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is not in your file, please correct.\n"); m->setControl_pressed(true); } newName = toString(counts[group]); counts[group]++; if ((placement == "back") && (group != "")) { newName += delim + group; } else if (group != "") { newName = group + delim + newName; } }else { newName = itMap->second; } oldMap[newName] = namesOfSeqs[i]; }else { map::iterator itMap = oldMap.find(namesOfSeqs[i]); if (itMap == oldMap.end()) { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is not in your map file, please correct.\n"); m->setControl_pressed(true);} else { newName = itMap->second; } } countTable->renameSeq(namesOfSeqs[i], newName); old2NewNameMap[namesOfSeqs[i]] = newName; } countTable->printTable(outCountFile); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } } if (groupMap != nullptr) { delete groupMap; } if (countTable != nullptr) { delete countTable; } return; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::readFasta(map& oldMap){ try { //prepare filenames and open files string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastaFile); } string outFastaFile = thisOutputDir + util.getRootName(util.getSimpleName(fastaFile)); map variables; variables["[filename]"] = outFastaFile; variables["[extension]"] = util.getExtension(fastaFile); outFastaFile = getOutputFileName("fasta", variables); outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); ifstream in; util.openInputFile(fastaFile, in); ofstream out; util.openOutputFile(outFastaFile, out); map::iterator it; int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); it = oldMap.find(seq.getName()); if (it == oldMap.end()) { //not in other files, create name if (!ignoreNew) { oldMap[seq.getName()] = toString(count); seq.setName(toString(count)); count++; } }else { seq.setName(it->second); } seq.printSequence(out); } in.close(); out.close(); } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** string RenameSeqsCommand::readFastq(map& oldMap){ try { bool oldMapEmpty = true; if (oldMap.size() != 0) { oldMapEmpty = false; } //prepare filenames and open files string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastqfile); } string outFastqFile = thisOutputDir + util.getRootName(util.getSimpleName(fastqfile)); map variables; variables["[filename]"] = outFastqFile; variables["[extension]"] = ".fastq"; outFastqFile = getOutputFileName("fastq", variables); outputNames.push_back(outFastqFile); outputTypes["fastq"].push_back(outFastqFile); //open input file ifstream inFastq; #ifdef USE_BOOST boost::iostreams::filtering_istream inFastqBoost; #endif if (!gz) { util.openInputFile(fastqfile, inFastq); } else { //compressed files #ifdef USE_BOOST util.openInputFileBinary(fastqfile, inFastq, inFastqBoost); #endif } #ifdef USE_BOOST #else if (gz) { m->mothurOut("[ERROR]: Your files are in compressed .gz form and you do not have the boost library install, quitting.\n"); m->setControl_pressed(true); return 0; } #endif string format = "illumina1.8+"; ofstream out; util.openOutputFile(outFastqFile, out); map::iterator it; int count = 0; bool good = true; while (good) { if (m->getControl_pressed()) { break; } bool tignore = false; FastqRead* fread; if (gz) { #ifdef USE_BOOST fread = new FastqRead(inFastqBoost, tignore, format); gobble(inFastqBoost); #endif }else { fread = new FastqRead(inFastq, tignore, format); gobble(inFastq); } string newName = toString(count); if (oldMapEmpty) { if ((placement == "back") && (groupName != "")) { newName += delim + groupName; } else if (groupName != "") { newName = groupName + delim + newName; } oldMap[fread->getName()] = newName; fread->setName(newName); count++; }else { it = oldMap.find(fread->getName()); if (it == oldMap.end()) { //not in other files, create name if (!ignoreNew) { oldMap[fread->getName()] = newName; count++; fread->setName(newName); } }else { fread->setName(it->second); } } fread->printFastq(out); delete fread; if (gz) { #ifdef USE_BOOST if (inFastqBoost.eof()) { good = false; break; } #endif }else { if (inFastq.eof()) { good = false; break; } } } if (gz) { #ifdef USE_BOOST inFastqBoost.pop(); #endif }else { inFastq.close(); } out.close(); return outFastqFile; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readFastq"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::readQual(map& oldMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); variables["[extension]"] = util.getExtension(qualfile); string outputFileName = getOutputFileName("qfile", variables); outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(qualfile, in); map::iterator it; int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { break; } QualityScores qual(in); gobble(in); it = oldMap.find(qual.getName()); if (it == oldMap.end()) { if (!ignoreNew) { oldMap[qual.getName()] = toString(count); qual.setName(toString(count)); count++; } }else { qual.setName(it->second); } qual.printQScores(out); } in.close(); out.close(); } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readQual"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::readTax(map& oldMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; int count = 0; map::iterator it; while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; gobble(in); tax = util.getline(in); gobble(in); it = oldMap.find(name); if (it == oldMap.end()) { if (!ignoreNew) { oldMap[name] = toString(count); name = toString(count); count++; } }else { name = it->second; } out << name << '\t' << tax << endl; } in.close(); out.close(); } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::readContigs(map& oldMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(contigsfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(contigsfile)); variables["[extension]"] = util.getExtension(contigsfile); string outputFileName = getOutputFileName("contigsreport", variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["contigsreport"].push_back(outputFileName); ifstream in; util.openInputFile(contigsfile, in); ContigsReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); map::iterator it; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } report.read(in); gobble(in); it = oldMap.find(report.getName()); if (it != oldMap.end()) { report.setName(it->second); } else { if (!ignoreNew) { oldMap[report.getName()] = toString(count); report.setName(toString(count)); count++; } } report.print(out); } in.close(); out.close(); } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readContigs"); exit(1); } } //********************************************************************************************************************** void RenameSeqsCommand::readList(map& oldMap){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); InputData input(listfile, "list", nullVector); set processedLabels; set userLabels; string lastLabel = ""; bool printHeaders = true; ListVector* list = util.getNextList(input, true, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } list->setPrintedLabels(printHeaders); //process list int count = 0; for (int i = 0; i < list->getNumBins(); i++) { string bin = list->get(i); vector names; util.splitAtComma(bin, names); for (int j = 0; j < names.size(); j++) { map::iterator it = oldMap.find(names[j]); if (it == oldMap.end()) { if (!ignoreNew) { string newName = toString(count); count++; oldMap[names[j]] = newName; names[j] = newName; } }else { names[j] = it->second; } } bin = util.getStringFromVector(names, ","); list->set(i, bin); } //print list list->print(out); printHeaders = false; delete list; list = util.getNextList(input, true, userLabels, processedLabels, lastLabel); } out.close(); } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readList"); exit(1); } } //********************************************************************************************************************** int RenameSeqsCommand::readMapFile(map& readMap){ try { ifstream in; util.openInputFile(mapFile, in); map::iterator it; string oldname, newname; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> oldname; gobble(in); in >> newname; gobble(in); it = readMap.find(oldname); if (it != readMap.end()) { m->mothurOut("[ERROR]: " + oldname + " is already in your map file. Sequence names must be unique, quitting.\n"); m->setControl_pressed(true); }else { readMap[oldname] = newname; } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readMapFile"); exit(1); } } //********************************************************************************************************************** int RenameSeqsCommand::processFile(){ try { ignoreNew = true; //if there are sequences only present in some files, ignore them map file2Group; gz = false; vector< vector > files = readFiles(file2Group, gz); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fileFile); } string outFileFile = thisOutputDir + util.getRootName(util.getSimpleName(fileFile)); map variables; variables["[filename]"] = outFileFile; variables["[extension]"] = util.getExtension(fileFile); outFileFile = getOutputFileName("file", variables); outputNames.push_back(outFileFile); outputTypes["file"].push_back(outFileFile); ofstream outFile; util.openOutputFile(outFileFile, outFile); for (int i = 0; i < files.size(); i++) { if (m->getControl_pressed()) { break; } m->mothurOut("\n>>>>>\tRenaming file pair " + files[i][0] + " - " + files[i][1] + " (files " + toString(i+1) + " of " + toString(files.size()) + ")\t<<<<<\n"); map old2NewNameMap; //each file pair gets a map file string fileOutput = ""; groupName = file2Group[i]; //blank if no group is in file if (groupName != "") { fileOutput += groupName + '\t'; } fastqfile = files[i][0]; //forwardFastq string renamedFile = readFastq(old2NewNameMap); fileOutput += util.getSimpleName(renamedFile) + '\t'; fastqfile = files[i][1]; //reverseFastq renamedFile = readFastq(old2NewNameMap); fileOutput += util.getSimpleName(renamedFile) + '\t'; if (files[i][2] != "") { //blank if no forward index is in file fastqfile = files[i][2]; //forwardIndex renamedFile = readFastq(old2NewNameMap); fileOutput += util.getSimpleName(renamedFile) + '\t'; } if (files[i][3] != "") { //blank if no reverse index is in file fastqfile = files[i][3]; //reverseIndex renamedFile = readFastq(old2NewNameMap); fileOutput += util.getSimpleName(renamedFile) + '\t'; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(files[i][0]); } string outMapFile = thisOutputDir + util.getRootName(util.getSimpleName(files[i][0])); map variables; variables["[filename]"] = outMapFile; outMapFile = getOutputFileName("map", variables); outputNames.push_back(outMapFile); outputTypes["map"].push_back(outMapFile); ofstream outMap; util.openOutputFile(outMapFile, outMap); //print map for(map::iterator it = old2NewNameMap.begin(); it != old2NewNameMap.end(); it++) { outMap << it->second << '\t' << it->first << endl; } outMap.close(); //print renamed filenames to new file file outFile << fileOutput << endl; } outFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "processFile"); exit(1); } } //********************************************************************************************************************** vector< vector > RenameSeqsCommand::readFiles(map& file2Group, bool& isGZ){ try { FileFile dataFile(fileFile, "contigs"); vector< vector > dataFiles = dataFile.getFiles(); int dataFileFormat = dataFile.getFileFormat(); file2Group = dataFile.getFile2Group(); isGZ = dataFile.isGZ(); if (file2Group.size() == 0) { m->setControl_pressed(true); } return dataFiles; } catch(exception& e) { m->errorOut(e, "RenameSeqsCommand", "readFiles"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/renameseqscommand.h000077500000000000000000000031111424121717000220740ustar00rootroot00000000000000// // renameseqscommand.h // Mothur // // Created by SarahsWork on 5/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_renameseqscommand_h #define Mothur_renameseqscommand_h #include "command.hpp" #include "filefile.hpp" class RenameSeqsCommand : public Command { #ifdef UNIT_TEST friend class TestRenameSeqsCommand; #endif public: RenameSeqsCommand(string); ~RenameSeqsCommand() = default; vector setParameters(); string getCommandName() { return "rename.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Rename.seqs"; } string getDescription() { return "rename sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastaFile, fastqfile, listfile, nameFile, groupfile, placement, delim, countfile, qualfile, contigsfile, fileFile, mapFile, taxfile, groupName; vector outputNames; bool abort, ignoreNew, gz; map nameMap; void readQual(map&); void readTax(map&); void readContigs(map&); void readList(map&); void readFasta(map&); string readFastq(map&); int processFile(); int readMapFile(map&); vector< vector > readFiles(map&, bool&); void processNameGroupCountFiles(map&, map&); }; #endif mothur-1.48.0/source/commands/reversecommand.cpp000077500000000000000000000176521424121717000217560ustar00rootroot00000000000000/* * reversecommand.cpp * Mothur * * Created by Pat Schloss on 6/6/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "reversecommand.h" #include "sequence.hpp" #include "qualityscores.h" //********************************************************************************************************************** vector ReverseSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "fastaQual", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "fastaQual", "none","qfile",false,false,true); parameters.push_back(pqfile); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ReverseSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The reverse.seqs command reads a fastafile and outputs a fasta file containing the reverse compliment.\n"; helpString += "The reverse.seqs command parameters fasta or qfile are required.\n"; helpString += "The reverse.seqs command should be in the following format: \n"; helpString += "reverse.seqs(fasta=yourFastaFile) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ReverseSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],rc,[extension]"; } else if (type == "qfile") { pattern = "[filename],rc,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ReverseSeqsCommand::ReverseSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastaFileName = validParameter.validFile(parameters, "fasta"); if (fastaFileName == "not open") { abort = true; } else if (fastaFileName == "not found") { fastaFileName = "";}// m->mothurOut("fasta is a required parameter for the reverse.seqs command.\n"); abort = true; } else { current->setFastaFile(fastaFileName); } qualFileName = validParameter.validFile(parameters, "qfile"); if (qualFileName == "not open") { abort = true; } else if (qualFileName == "not found") { qualFileName = ""; }//m->mothurOut("fasta is a required parameter for the reverse.seqs command.\n"); abort = true; } else { current->setQualFile(qualFileName); } if((fastaFileName == "") && (qualFileName == "")){ fastaFileName = current->getFastaFile(); if (fastaFileName != "") { m->mothurOut("Using " + fastaFileName + " as input file for the fasta parameter.\n"); } else { qualFileName = current->getQualFile(); if (qualFileName != "") { m->mothurOut("Using " + qualFileName + " as input file for the qfile parameter.\n"); } else { m->mothurOut("You have no current files for fasta or qfile, and fasta or qfile is a required parameter for the reverse.seqs command.\n"); abort = true; } } } } } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "ReverseSeqsCommand"); exit(1); } } //*************************************************************************************************************** int ReverseSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } string fastaReverseFileName; if(fastaFileName != ""){ ifstream inFASTA; util.openInputFile(fastaFileName, inFASTA); ofstream outFASTA; string tempOutputDir = outputdir; if (outputdir == "") { tempOutputDir += util.hasPath(fastaFileName); } map variables; variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(fastaFileName)); variables["[extension]"] = util.getExtension(fastaFileName); fastaReverseFileName = getOutputFileName("fasta", variables); util.openOutputFile(fastaReverseFileName, outFASTA); while(!inFASTA.eof()){ if (m->getControl_pressed()) { inFASTA.close(); outFASTA.close(); util.mothurRemove(fastaReverseFileName); return 0; } Sequence currSeq(inFASTA); gobble(inFASTA); if (currSeq.getName() != "") { currSeq.reverseComplement(); currSeq.printSequence(outFASTA); } } inFASTA.close(); outFASTA.close(); outputNames.push_back(fastaReverseFileName); outputTypes["fasta"].push_back(fastaReverseFileName); } string qualReverseFileName; if(qualFileName != ""){ QualityScores currQual; ifstream inQual; util.openInputFile(qualFileName, inQual); ofstream outQual; string tempOutputDir = outputdir; if (outputdir == "") { tempOutputDir += util.hasPath(qualFileName); } map variables; variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(qualFileName)); variables["[extension]"] = util.getExtension(qualFileName); string qualReverseFileName = getOutputFileName("qfile", variables); util.openOutputFile(qualReverseFileName, outQual); while(!inQual.eof()){ if (m->getControl_pressed()) { inQual.close(); outQual.close(); util.mothurRemove(qualReverseFileName); return 0; } currQual = QualityScores(inQual); gobble(inQual); currQual.flipQScores(); currQual.printQScores(outQual); } inQual.close(); outQual.close(); outputNames.push_back(qualReverseFileName); outputTypes["qfile"].push_back(qualReverseFileName); } if (m->getControl_pressed()) { util.mothurRemove(qualReverseFileName); util.mothurRemove(fastaReverseFileName); return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for(int i=0;imothurOut(outputNames[i]); m->mothurOutEndLine(); } return 0; } catch(exception& e) { m->errorOut(e, "ReverseSeqsCommand", "execute"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/reversecommand.h000077500000000000000000000016051424121717000214120ustar00rootroot00000000000000#ifndef REVERSECOMMAND_H #define REVERSECOMMAND_H /* * reversecommand.h * Mothur * * Created by Pat Schloss on 6/6/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "command.hpp" class ReverseSeqsCommand : public Command { public: ReverseSeqsCommand(string); ~ReverseSeqsCommand() = default; vector setParameters(); string getCommandName() { return "reverse.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Reverse.seqs"; } string getDescription() { return "outputs a fasta file containing the reverse-complements"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string fastaFileName, qualFileName; vector outputNames; }; #endif mothur-1.48.0/source/commands/screenseqscommand.cpp000066400000000000000000001631461424121717000224530ustar00rootroot00000000000000/* * screenseqscommand.cpp * Mothur * * Created by Pat Schloss on 6/3/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "screenseqscommand.h" #include "counttable.h" #include "summary.hpp" #include "removeseqscommand.h" #include "alignreport.hpp" #include "contigsreport.hpp" //********************************************************************************************************************** vector ScreenSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "report", "none", "none","contigsreport",false,false,true); parameters.push_back(pcontigsreport); CommandParameter palignreport("alignreport", "InputTypes", "", "", "report", "none", "none","alignreport",false,false); parameters.push_back(palignreport); CommandParameter psummary("summary", "InputTypes", "", "", "report", "none", "none","summary",false,false); parameters.push_back(psummary); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false); parameters.push_back(pqfile); CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false); parameters.push_back(ptax); CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pstart); CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pend); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pmaxhomop("maxhomop", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pminlength("minlength", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pcriteria("criteria", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pcriteria); CommandParameter poptimize("optimize", "Multiple", "none-start-end-maxambig-maxhomop-minlength-maxlength", "none", "", "", "","",true,false); parameters.push_back(poptimize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); //report parameters CommandParameter pminoverlap("minoverlap", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminoverlap); CommandParameter postart("ostart", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(postart); CommandParameter poend("oend", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(poend); CommandParameter pmismatches("mismatches", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmismatches); CommandParameter pmaxn("maxn", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxn); CommandParameter pminscore("minscore", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminscore); CommandParameter pmaxinsert("maxinsert", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxinsert); CommandParameter pminsim("minsim", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminsim); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["contigsreport"] = tempOutNames; outputTypes["summary"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ScreenSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The screen.seqs command reads a fasta file and screens sequences.\n"; helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, contigsreport, summary, taxonomy, optimize, criteria and processors. The fasta parameter is required.\n"; helpString += "The contigsreport parameter allows you to use the contigsreport file to determine if a sequence is good. Screening parameters include: minoverlap, ostart, oend and mismatches. \n"; helpString += "The alignreport parameter allows you to use the alignreport file to determine if a sequence is good. Screening parameters include: minsim, minscore and maxinsert. \n"; helpString += "The summary parameter allows you to use the summary file from summary.seqs to save time processing.\n"; helpString += "The taxonomy parameter allows you to remove bad seqs from taxonomy files.\n"; helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1, meaning ignore.\n"; helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1, meaning ignore.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambiguous bases allowed. The default is -1, meaning ignore.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The minlength parameter allows you to set and minimum sequence length. Default=10.\n"; helpString += "The maxn parameter allows you to set and maximum number of N's allowed in a sequence. \n"; helpString += "The minoverlap parameter allows you to set and minimum overlap. The default is -1, meaning ignore. \n"; helpString += "The ostart parameter is used to set an overlap position the \"good\" sequences must start by. The default is -1, meaning ignore. \n"; helpString += "The oend parameter is used to set an overlap position the \"good\" sequences must end after. The default is -1, meaning ignore.\n"; helpString += "The mismatches parameter allows you to set and maximum mismatches in the contigs.report. \n"; helpString += "The minsim parameter allows you to set the minimum similarity to template sequences during alignment. Found in column \'SimBtwnQuery&Template\' in align.report file.\n"; helpString += "The minscore parameter allows you to set the minimum search score during alignment. Found in column \'SearchScore\' in align.report file.\n"; helpString += "The maxinsert parameter allows you to set the maximum number of insertions during alignment. Found in column \'LongestInsert\' in align.report file.\n"; helpString += "The processors parameter allows you to specify the number of processors to use while running the command. The default is all available.\n"; helpString += "The optimize and criteria parameters allow you set the start, end, maxabig, maxhomop, minlength and maxlength parameters relative to your set of sequences.\n"; helpString += "For example optimize=start-end, criteria=90, would set the start and end values to the position 90% of your sequences started and ended.\n"; helpString += "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n"; helpString += "The screen.seqs command should be in the following format: \n"; helpString += "screen.seqs(fasta=yourFastaFile, count=yourCountFile, start=yourStart, end=yourEnd)\n"; helpString += "Example screen.seqs(fasta=stability.trim.contigs.good.unique.align, count=stability.trim.contigs.good.count_table, start=1968, end=11550, maxhomop=8)\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ScreenSeqsCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string issue = "Screen.seqs is removing most / all of my sequences. What do I do?"; issues.push_back(issue); string ianswer = "\tThe most common cause of mothur removing all of your reads is a misunderstanding of the start and end parameter options. The start parameter is used to set a position the \"good\" sequences must START BY. Any read with a start position before the value given to start will be removed. The end parameter is used to set a position the \"good\" sequences must END AFTER. Any read that ends before the value given to end will be removed. Alternatively, reads may be removed if there is poor or no overlap in your dataset.\n"; ianswers.push_back(ianswer); string howto = "How do I set the start and end parameters?"; howtos.push_back(howto); string hanswer = "\tRun summary.seqs(fasta=yourFastaFile) to summarize your datasets start and end positions. \n\n\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs\nMinimum:\t1250\t10693\t250\t0\t3\t1\n2.5%-tile:\t1968\t11550\t252\t0\t3\t3222\n25%-tile:\t1968\t11550\t252\t0\t4\t32219\nMedian:\t\t1968\t11550\t252\t0\t4\t64437\n75%-tile:\t1968\t11550\t253\t0\t5\t96655\n97.5%-tile:\t1968\t11550\t253\t0\t6\t125651\nMaximum:\t1982\t13400\t270\t0\t12\t128872\nMean:\t\t1967.99\t11550\t252.462\t0\t4.36693\n# of unique seqs:\t16426\ntotal # of seqs:\t128872\n\nThe start parameter is used to set a position the \"good\" sequences must start by. In general, you want to select the start value found at 97.5%. Meaning 97.5% of your reads start by this position. In the example above we want to set start=1968. The end parameter is used to set a position the \"good\" sequences must end after. In general you set this to the end position at 2.5%. Meaning 97.5% of your reads end after this position. In the example above we want to set end=11550.\n"; hanswers.push_back(hanswer); string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string ScreenSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],good,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],good,[extension]"; } else if (type == "name") { pattern = "[filename],good,[extension]"; } else if (type == "group") { pattern = "[filename],good,[extension]"; } else if (type == "count") { pattern = "[filename],good,[extension]"; } else if (type == "accnos") { pattern = "[filename],bad.accnos"; } else if (type == "qfile") { pattern = "[filename],good,[extension]"; } else if (type == "alignreport") { pattern = "[filename],good.[extension]"; } else if (type == "contigsreport") { pattern = "[filename],good.[extension]"; } else if (type == "summary") { pattern = "[filename],good.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** ScreenSeqsCommand::ScreenSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fileType = "name file"; //check for required parameters fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { abort = true; } else if (qualfile == "not found") { qualfile = ""; } else { current->setQualFile(qualfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); fileType = "count file"; } contigsreport = validParameter.validFile(parameters, "contigsreport"); if (contigsreport == "not open") { contigsreport = ""; abort = true; } else if (contigsreport == "not found") { contigsreport = ""; } summaryfile = validParameter.validFile(parameters, "summary"); if (summaryfile == "not open") { summaryfile = ""; abort = true; } else if (summaryfile == "not found") { summaryfile = ""; } else { current->setSummaryFile(summaryfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } alignreport = validParameter.validFile(parameters, "alignreport"); if (alignreport == "not open") { abort = true; } else if (alignreport == "not found") { alignreport = ""; } taxonomy = validParameter.validFile(parameters, "taxonomy"); if (taxonomy == "not open") { abort = true; } else if (taxonomy == "not found") { taxonomy = ""; } if (outputdir == ""){ outputdir += util.hasPath(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.valid(parameters, "start"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, startPos); temp = validParameter.valid(parameters, "end"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, endPos); temp = validParameter.valid(parameters, "maxambig"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxAmbig); temp = validParameter.valid(parameters, "maxhomop"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxHomoP); temp = validParameter.valid(parameters, "minlength"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, minLength); temp = validParameter.valid(parameters, "maxlength"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxLength); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "minoverlap"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, minOverlap); temp = validParameter.valid(parameters, "ostart"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, oStart); temp = validParameter.valid(parameters, "oend"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, oEnd); temp = validParameter.valid(parameters, "mismatches"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, mismatches); temp = validParameter.valid(parameters, "maxn"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxN); temp = validParameter.valid(parameters, "minscore"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, minScore); temp = validParameter.valid(parameters, "maxinsert"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxInsert); temp = validParameter.valid(parameters, "minsim"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, minSim); temp = validParameter.valid(parameters, "optimize"); //optimizing trumps the optimized values original value if (temp == "not found"){ temp = "none"; } util.splitAtDash(temp, optimize); if ((contigsreport != "") && ((summaryfile != "") || ( alignreport != ""))) { m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true; } if ((alignreport != "") && ((summaryfile != "") || ( contigsreport != ""))) { m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true; } if ((summaryfile != "") && ((alignreport != "") || ( contigsreport != ""))) { m->mothurOut("[ERROR]: You may only provide one of the following: contigsreport, alignreport or summary, aborting.\n"); abort=true; } //check to make sure you have the files you need for certain screening if ((contigsreport == "") && ((minOverlap != -1) || (oStart != -1) || (oEnd != -1) || (mismatches != -1))) { m->mothurOut("[ERROR]: minoverlap, ostart, oend and mismatches can only be used with a contigs.report file, aborting.\n"); abort=true; } if ((alignreport == "") && (!util.isEqual(minScore, -1) || (maxInsert != -1) || !util.isEqual(minSim, -1))) { m->mothurOut("[ERROR]: minscore, maxinsert and minsim can only be used with a align.report file, aborting.\n"); abort=true; } //check for invalid optimize options set validOptimizers; validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength"); validOptimizers.insert("maxn"); if (contigsreport != "") { validOptimizers.insert("minoverlap"); validOptimizers.insert("ostart"); validOptimizers.insert("oend"); validOptimizers.insert("mismatches"); } if (alignreport != "") { validOptimizers.insert("minscore"); validOptimizers.insert("maxinsert"); validOptimizers.insert("minsim"); } for (int i = 0; i < optimize.size(); i++) { if (validOptimizers.count(optimize[i]) == 0) { m->mothurOut(optimize[i] + " is not a valid optimizer with your input files. Valid options are "); string valid = ""; for (set::iterator it = validOptimizers.begin(); it != validOptimizers.end(); it++) { valid += (*it) + ", "; } if (valid.length() != 0) { valid = valid.substr(0, valid.length()-2); } m->mothurOut(valid + ".\n"); optimize.erase(optimize.begin()+i); i--; } } if (optimize.size() == 1) { if (optimize[0] == "none") { optimize.clear(); } } temp = validParameter.valid(parameters, "criteria"); if (temp == "not found"){ temp = "90"; } util.mothurConvert(temp, criteria); } } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } map badSeqNames; long start = time(nullptr); long long numFastaSeqs = 0; //use the namefile to optimize correctly if (namefile != "") { nameMap = util.readNames(namefile); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, true, false); nameMap = ct.getNameMap(); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); badAccnosFile = getOutputFileName("accnos",variables); if ((contigsreport == "") && (summaryfile == "") && (alignreport == "")) { numFastaSeqs = screenFasta(badSeqNames); } else { numFastaSeqs = screenReports(badSeqNames); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //don't write or keep if blank bool wroteAccnos = false; if (util.isBlank(badAccnosFile)) { m->mothurOut("[NOTE]: no sequences were bad, removing " + badAccnosFile + "\n\n"); util.mothurRemove(badAccnosFile); } else { outputNames.push_back(badAccnosFile); outputTypes["accnos"].push_back(badAccnosFile); wroteAccnos = true; } if (wroteAccnos) { //use remove.seqs to create new name, group and count file if ((countfile != "") || (namefile != "") || (groupfile != "") || (qualfile != "") || (taxonomy != "")) { string strippedAccnos = printAccnos(badSeqNames); string inputString = "accnos=" + strippedAccnos; if (countfile != "") { inputString += ", count=" + countfile; } else{ if (namefile != "") { inputString += ", name=" + namefile; } if (groupfile != "") { inputString += ", group=" + groupfile; } } if(qualfile != "") { inputString += ", qfile=" + qualfile; } if(taxonomy != "") { inputString += ", taxonomy=" + taxonomy; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: remove.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); util.mothurRemove(strippedAccnos); if (groupfile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string outGroup = getOutputFileName("group", variables); util.renameFile(filenames["group"][0], outGroup); outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup); } if (namefile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outName = getOutputFileName("name", variables); util.renameFile(filenames["name"][0], outName); outputNames.push_back(outName); outputTypes["name"].push_back(outName); } if (countfile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outCount = getOutputFileName("count", variables); util.renameFile(filenames["count"][0], outCount); outputNames.push_back(outCount); outputTypes["count"].push_back(outCount); } if (qualfile != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); variables["[extension]"] = util.getExtension(qualfile); string outQual = getOutputFileName("qfile", variables); util.renameFile(filenames["qfile"][0], outQual); outputNames.push_back(outQual); outputTypes["qfile"].push_back(outQual); } if (taxonomy != "") { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxonomy); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxonomy)); variables["[extension]"] = util.getExtension(taxonomy); string outTax = getOutputFileName("taxonomy", variables); util.renameFile(filenames["taxonomy"][0], outTax); outputNames.push_back(outTax); outputTypes["taxonomy"].push_back(outTax); } } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOut("\n\n"); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.\n"); return 0; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "execute"); exit(1); } } //***************************************************************************************************************/ string ScreenSeqsCommand::printAccnos(map& badSeqNames){ try{ string filename = badAccnosFile + ".temp"; ofstream out; util.openOutputFile(filename, out); for (map::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) { out << it->first << endl; } out.close(); return filename; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "printAccnos"); exit(1); } } //***************************************************************************************************************/ int ScreenSeqsCommand::runFastaScreening(map& badSeqNames){ try{ map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string goodSeqFile = getOutputFileName("fasta", variables); outputNames.push_back(goodSeqFile); outputTypes["fasta"].push_back(goodSeqFile); int numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); if (m->getControl_pressed()) { util.mothurRemove(goodSeqFile); return numFastaSeqs; } return numFastaSeqs; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "runFastaScreening"); exit(1); } } //***************************************************************************************************************/ int ScreenSeqsCommand::screenReports(map& badSeqNames){ try{ int numFastaSeqs = 0; //did not provide a summary file, but set a parameter that requires summarizing the fasta file //or did provide a summary file, but set maxn parameter so we must summarize the fasta file if (((summaryfile == "") && ((util.inUsersGroups("maxambig", optimize)) ||(util.inUsersGroups("maxhomop", optimize)) ||(util.inUsersGroups("maxlength", optimize)) || (util.inUsersGroups("minlength", optimize)) || (util.inUsersGroups("start", optimize)) || (util.inUsersGroups("end", optimize)))) || ((summaryfile != "") && util.inUsersGroups("maxn", optimize))) { getSummary(); } if ((summaryfile != "") && ((util.inUsersGroups("maxambig", optimize)) ||(util.inUsersGroups("maxhomop", optimize)) ||(util.inUsersGroups("maxlength", optimize)) || (util.inUsersGroups("minlength", optimize)) || (util.inUsersGroups("start", optimize)) || (util.inUsersGroups("end", optimize)))) { //summarize based on summaryfile getSummaryReport(); }else if ((contigsreport != "") && ((util.inUsersGroups("minoverlap", optimize)) || (util.inUsersGroups("ostart", optimize)) || (util.inUsersGroups("oend", optimize)) || (util.inUsersGroups("mismatches", optimize)))) { //optimize settings based on contigs file optimizeContigs(); }else if ((alignreport != "") && ((util.inUsersGroups("minsim", optimize)) || (util.inUsersGroups("minscore", optimize)) || (util.inUsersGroups("maxinsert", optimize)))) { //optimize settings based on contigs file optimizeAlign(); } //provided summary file, and did not set maxn so no need to summarize fasta if (summaryfile != "") { numFastaSeqs = screenSummary(badSeqNames); } //add in any seqs that fail due to contigs report results else if (contigsreport != "") { numFastaSeqs = screenContigs(badSeqNames); } //add in any seqs that fail due to align report else if (alignreport != "") { numFastaSeqs = screenAlignReport(badSeqNames); } return numFastaSeqs; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "screenReports"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::screenAlignReport(map& badSeqNames){ try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(alignreport)); variables["[extension]"] = util.getExtension(alignreport); string outSummary = getOutputFileName("alignreport",variables); outputNames.push_back(outSummary); outputTypes["alignreport"].push_back(outSummary); ofstream out; util.openOutputFile(outSummary, out); ifstream in; util.openInputFile(alignreport, in); AlignReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); return 0; } report.read(in); gobble(in); string trashCode = ""; bool goodSeq = true; // innocent until proven guilty //check longest insert int LongestInsert = report.getLongestInsert(); if(maxInsert != -1 && maxInsert < LongestInsert) { goodSeq = false; trashCode += "insert|"; } //check searchscore float SearchScore = report.getSearchScore(); if(!util.isEqual(minScore, -1) && minScore > SearchScore) { goodSeq = false; trashCode += "score|"; } //check similarity to template float SimBtwnQueryTemplate = report.getSimBtwnQueryAndTemplate(); if(!util.isEqual(minSim, -1) && minSim > SimBtwnQueryTemplate) { goodSeq = false; trashCode += "sim|"; } //print or assign to badSeqs if(goodSeq){ report.print(out); } else{ badSeqNames[report.getQueryName()] = trashCode; } count++; } in.close(); out.close(); int oldBadSeqsCount = badSeqNames.size(); int numFastaSeqs = runFastaScreening(badSeqNames); if (oldBadSeqsCount != badSeqNames.size()) { //more seqs were removed by maxns util.renameFile(outSummary, outSummary+".temp"); ofstream out2; util.openOutputFile(outSummary, out2); ifstream in2; util.openInputFile(outSummary+".temp", in2); report.readHeaders(in2); gobble(in2); report.printHeaders(out2); while (!in2.eof()) { if (m->getControl_pressed()) { in2.close(); out2.close(); return 0; } report.read(in2); gobble(in2); //are you good? if (badSeqNames.count(report.getQueryName()) == 0) { report.print(out2); } } in2.close(); out2.close(); util.mothurRemove(outSummary+".temp"); } if (numFastaSeqs != count) { m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your align report file, quitting.\n"); m->setControl_pressed(true); } return count; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport"); exit(1); } } //***************************************************************************************************************/ int ScreenSeqsCommand::screenContigs(map& badSeqNames){ try{ map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(contigsreport)); variables["[extension]"] = util.getExtension(contigsreport); string outSummary = getOutputFileName("contigsreport",variables); outputNames.push_back(outSummary); outputTypes["contigsreport"].push_back(outSummary); ofstream out; util.openOutputFile(outSummary, out); ifstream in;util.openInputFile(contigsreport, in); ContigsReport report; report.readHeaders(in); gobble(in); report.printHeaders(out); int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); return 0; } report.read(in); gobble(in); bool goodSeq = true; // innocent until proven guilty string trashCode = ""; if(oStart != -1 && oStart < report.getOverlapStart()) { goodSeq = false; trashCode += "ostart|"; } if(oEnd != -1 && oEnd > report.getOverlapEnd()) { goodSeq = false; trashCode += "oend|"; } if(maxN != -1 && maxN < report.getNumNs()) { goodSeq = false; trashCode += "n|"; } if(minOverlap != -1 && minOverlap > report.getOverlapLength()) { goodSeq = false; trashCode += "olength|"; } if(mismatches != -1 && mismatches < report.getMisMatches()) { goodSeq = false; trashCode += "mismatches|"; } if(goodSeq) { report.print(out); } else { badSeqNames[report.getName()] = trashCode; } count++; } in.close(); out.close(); int oldBadSeqsCount = badSeqNames.size(); int numFastaSeqs = runFastaScreening(badSeqNames); if (oldBadSeqsCount != badSeqNames.size()) { //more seqs were removed by maxns util.renameFile(outSummary, outSummary+".temp"); ofstream out2; util.openOutputFile(outSummary, out2); ifstream in2; util.openInputFile(outSummary+".temp", in2); report.readHeaders(in2); gobble(in2); report.printHeaders(out2); while (!in2.eof()) { if (m->getControl_pressed()) { in2.close(); out2.close(); return 0; } report.read(in2); gobble(in2); if (badSeqNames.count(report.getName()) == 0) { report.print(out2); } } in2.close(); out2.close(); util.mothurRemove(outSummary+".temp"); } if (numFastaSeqs != count) { m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your contigs report file, quitting.\n"); m->setControl_pressed(true); } return count; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "screenContigs"); exit(1); } } //***************************************************************************************************************/ int ScreenSeqsCommand::screenSummary(map& badSeqNames){ try{ map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(summaryfile)); string outSummary = getOutputFileName("summary",variables); outputNames.push_back(outSummary); outputTypes["summary"].push_back(outSummary); string name; int start, end, length, ambigs, polymer, numReps; ofstream out; util.openOutputFile(outSummary, out); ifstream in; util.openInputFile(summaryfile, in); out << (util.getline(in)) << endl; //skip headers int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); out.close(); return 0; } //seqname start end nbases ambigs polymer numSeqs in >> name >> start >> end >> length >> ambigs >> polymer >> numReps; gobble(in); bool goodSeq = true; // innocent until proven guilty string trashCode = ""; if(startPos != -1 && startPos < start) { goodSeq = false; trashCode += "start|"; } if(endPos != -1 && endPos > end) { goodSeq = false; trashCode += "end|"; } if(maxAmbig != -1 && maxAmbig < ambigs) { goodSeq = false; trashCode += "ambig|"; } if(maxHomoP != -1 && maxHomoP < polymer) { goodSeq = false; trashCode += "homop|"; } if(minLength > length) { goodSeq = false; trashCode += "getControl_pressed()) { in2.close(); out2.close(); return 0; } //seqname start end nbases ambigs polymer numSeqs in2 >> name >> start >> end >> length >> ambigs >> polymer >> numReps; gobble(in2); if (badSeqNames.count(name) == 0) { //are you good? out2 << name << '\t' << start << '\t' << end << '\t' << length << '\t' << ambigs << '\t' << polymer << '\t' << numReps << endl; } } in2.close(); out2.close(); util.mothurRemove(outSummary+".temp"); } if (numFastaSeqs != count) { m->mothurOut("[ERROR]: found " + toString(numFastaSeqs) + " sequences in your fasta file, and " + toString(count) + " sequences in your summary file, quitting.\n"); m->setControl_pressed(true); } return count; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "screenSummary"); exit(1); } } //***************************************************************************************************************/ int ScreenSeqsCommand::screenFasta(map& badSeqNames){ try{ if (optimize.size() != 0) { getSummary(); } if (m->getControl_pressed()) { return 0; } int numFastaSeqs = runFastaScreening(badSeqNames); return numFastaSeqs; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "screenFasta"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::getSummaryReport(){ try { Summary sum(processors); sum.summarizeFastaSummary(summaryfile); double criteriaPercentile = criteria; double mincriteriaPercentile = (100 - criteria); for (int i = 0; i < optimize.size(); i++) { if (optimize[i] == "start") { startPos = sum.getStart(criteriaPercentile); m->mothurOut("Optimizing start to " + toString(startPos) + ".\n"); } else if (optimize[i] == "end") { endPos = sum.getEnd(mincriteriaPercentile); m->mothurOut("Optimizing end to " + toString(endPos) + ".\n"); } else if (optimize[i] == "maxambig") { maxAmbig = sum.getAmbig(criteriaPercentile); m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + ".\n"); } else if (optimize[i] == "maxhomop") { maxHomoP = sum.getAmbig(criteriaPercentile); m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + ".\n"); } else if (optimize[i] == "minlength") { minLength = sum.getLength(mincriteriaPercentile); m->mothurOut("Optimizing minlength to " + toString(minLength) + ".\n"); if (minLength < 0) { m->setControl_pressed(true); } } else if (optimize[i] == "maxlength") { maxLength = sum.getLength(criteriaPercentile); m->mothurOut("Optimizing maxlength to " + toString(maxLength) + ".\n"); } } return 0; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "getSummaryReport"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::optimizeContigs(){ try{ Summary sum(processors); sum.summarizeContigsSummary(contigsreport); double criteriaPercentile = criteria; double mincriteriaPercentile = (100 - criteria); for (int i = 0; i < optimize.size(); i++) { if (optimize[i] == "ostart") { oStart = sum.getOStart(criteriaPercentile); m->mothurOut("Optimizing ostart to " + toString(oStart) + ".\n"); } else if (optimize[i] == "oend") { oEnd = sum.getOEnd(mincriteriaPercentile); m->mothurOut("Optimizing oend to " + toString(oEnd) + ".\n"); } else if (optimize[i] == "mismatches") { mismatches = sum.getMisMatches(criteriaPercentile); m->mothurOut("Optimizing mismatches to " + toString(mismatches) + ".\n"); } else if (optimize[i] == "maxn") { maxN = sum.getNumNs(criteriaPercentile); m->mothurOut("Optimizing maxn to " + toString(maxN) + ".\n"); } else if (optimize[i] == "minoverlap") { minOverlap = sum.getOLength(mincriteriaPercentile); m->mothurOut("Optimizing minoverlap to " + toString(minOverlap) + ".\n"); } } return 0; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "optimizeContigs"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::optimizeAlign(){ try { Summary sum(processors); sum.summarizeAlignSummary(alignreport); double mincriteriaPercentile = (100 - criteria); for (int i = 0; i < optimize.size(); i++) { if (optimize[i] == "minsim") { minSim = sum.getSims(mincriteriaPercentile); m->mothurOut("Optimizing minsim to " + toString(minSim) + ".\n"); } else if (optimize[i] == "minscore") { minScore = sum.getScores(mincriteriaPercentile); m->mothurOut("Optimizing minscore to " + toString(minScore) + ".\n"); } else if (optimize[i] == "maxinsert") { maxInsert = sum.getNumInserts(mincriteriaPercentile); m->mothurOut("Optimizing maxinsert to " + toString(maxInsert) + ".\n"); } } return 0; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "optimizeAlign"); exit(1); } } //*************************************************************************************************************** int ScreenSeqsCommand::getSummary(){ try { Summary sum(processors); sum.summarizeFasta(fastafile, ""); //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs double criteriaPercentile = criteria; double mincriteriaPercentile = (100 - criteria); for (int i = 0; i < optimize.size(); i++) { if (optimize[i] == "start") { startPos = sum.getStart(criteriaPercentile); m->mothurOut("Optimizing start to " + toString(startPos) + ".\n"); } else if (optimize[i] == "end") { endPos = sum.getEnd(mincriteriaPercentile); m->mothurOut("Optimizing end to " + toString(endPos) + ".\n"); } else if (optimize[i] == "maxambig") { maxAmbig = sum.getAmbig(criteriaPercentile); m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + ".\n"); } else if (optimize[i] == "maxhomop") { maxHomoP = sum.getAmbig(criteriaPercentile); m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + ".\n"); } else if (optimize[i] == "minlength") { minLength = sum.getLength(mincriteriaPercentile); m->mothurOut("Optimizing minlength to " + toString(minLength) + ".\n"); if (minLength < 0) { m->setControl_pressed(true); } } else if (optimize[i] == "maxlength") { maxLength = sum.getLength(criteriaPercentile); m->mothurOut("Optimizing maxlength to " + toString(maxLength) + ".\n"); } else if (optimize[i] == "maxn") { maxN = sum.getNumNs(criteriaPercentile); m->mothurOut("Optimizing maxn to " + toString(maxN) + ".\n"); } } return 0; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "getSummary"); exit(1); } } //********************************************************************************************************************** void driverScreen(sumScreenData* params){ try { ifstream inFASTA; params->util.openInputFile(params->filename, inFASTA); inFASTA.seekg(params->start); //print header if you are process 0 if (params->start == 0) { params->util.zapGremlins(inFASTA); gobble(inFASTA); } bool done = false; params->count = 0; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence currSeq(inFASTA); gobble(inFASTA); if (currSeq.getName() != "") { bool goodSeq = true; // innocent until proven guilty string trashCode = ""; //have the report files found you bad map::iterator it = params->badSeqNames.find(currSeq.getName()); if (it != params->badSeqNames.end()) { goodSeq = 0; trashCode = it->second; } if (params->summaryfile == "") { //summaryfile includes these so no need to check again if(params->startPos != -1 && params->startPos < currSeq.getStartPos()) { goodSeq = false; trashCode += "start|"; } if(params->endPos != -1 && params->endPos > currSeq.getEndPos()) { goodSeq = false; trashCode += "end|"; } if(params->maxAmbig != -1 && params->maxAmbig < currSeq.getAmbigBases()) { goodSeq = false; trashCode += "ambig|"; } if(params->maxHomoP != -1 && params->maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = false; trashCode += "homop|"; } if(params->minLength > currSeq.getNumBases()) { goodSeq = false; trashCode += "maxLength != -1 && params->maxLength < currSeq.getNumBases()) { goodSeq = false; trashCode += ">length|";} if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + currSeq.getName() + "\t" + toString(currSeq.getStartPos()) + "\t" + toString(currSeq.getEndPos()) + "\t" + toString(currSeq.getNumBases()) + "\n"); } } if (params->contigsreport == "") { //contigs report includes this so no need to check again if(params->maxN != -1 && params->maxN < currSeq.getNumNs()) { goodSeq = false; trashCode += "n|"; } } if(goodSeq){ currSeq.printSequence(params->outputWriter); }else{ string badAccnos = currSeq.getName() + '\t' + trashCode.substr(0, trashCode.length()-1) + '\n'; params->accnosWriter->write(badAccnos); params->badSeqNames[currSeq.getName()] = trashCode; } params->count++; } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->end == params->count) { break; } #endif //report progress if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } } //report progress if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "ScreenSeqsCommand", "driverScreen"); exit(1); } } /**************************************************************************************************/ int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, map& badSeqNames) { try { vector lines; vector positions; #if defined NON_WINDOWS positions = util.divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(fastafile, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; long long num = 0; time_t start, end; time(&start); auto synchronizedOutputFile = std::make_shared(goodFileName); auto synchronizedAccnosFile = std::make_shared(badAccnos); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* outputThreadWriter = new OutputWriter(synchronizedOutputFile); OutputWriter* accnosThreadWriter = new OutputWriter(synchronizedAccnosFile); sumScreenData* dataBundle = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, maxN, badSeqNames, filename, summaryfile, contigsreport, lines[i+1].start, lines[i+1].end,outputThreadWriter, accnosThreadWriter); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverScreen, dataBundle)); } OutputWriter* outputThreadWriter = new OutputWriter(synchronizedOutputFile); OutputWriter* accnosThreadWriter = new OutputWriter(synchronizedAccnosFile); sumScreenData* dataBundle = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, maxN, badSeqNames, filename, summaryfile, contigsreport, lines[0].start, lines[0].end,outputThreadWriter, accnosThreadWriter); driverScreen(dataBundle); num = dataBundle->count; for (map::iterator it = dataBundle->badSeqNames.begin(); it != dataBundle->badSeqNames.end(); it++) { badSeqNames[it->first] = it->second; } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; for (map::iterator it = data[i]->badSeqNames.begin(); it != data[i]->badSeqNames.end(); it++) { badSeqNames[it->first] = it->second; } delete data[i]->outputWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } long long numRemoved = badSeqNames.size(); time(&end); m->mothurOut("\nIt took " + toString(difftime(end, start)) + " secs to screen " + toString(num) + " sequences, removed " + toString(numRemoved) + ".\n\n"); delete outputThreadWriter; delete accnosThreadWriter; delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "createProcesses"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/screenseqscommand.h000077500000000000000000000062301424121717000221110ustar00rootroot00000000000000#ifndef SCREENSEQSCOMMAND_H #define SCREENSEQSCOMMAND_H /* * screenseqscommand.h * Mothur * * Created by Pat Schloss on 6/3/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "sequence.hpp" #include "writer.h" class ScreenSeqsCommand : public Command { public: ScreenSeqsCommand(string); ~ScreenSeqsCommand() = default; vector setParameters(); string getCommandName() { return "screen.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; } string getDescription() { return "enables you to keep sequences that fulfill certain user defined criteria"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int optimizeContigs(); int optimizeAlign(); int createProcesses(string, string, string, map&); int screenSummary(map&); int screenContigs(map&); int screenAlignReport(map&); int runFastaScreening(map&); int screenFasta(map&); int screenReports(map&); string printAccnos(map& badSeqNames); int getSummary(); int getSummaryReport(); bool abort; string fastafile, namefile, groupfile, alignreport, qualfile, taxonomy, countfile, contigsreport, summaryfile, fileType, badAccnosFile; int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, minOverlap, oStart, oEnd, mismatches, maxN, maxInsert; float minSim, minScore, criteria; vector outputNames; vector optimize; map nameMap; }; /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct sumScreenData { int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, maxN; unsigned long long start; unsigned long long end; int count; MothurOut* m; string goodFName, badAccnosFName, filename; map badSeqNames; string summaryfile, contigsreport; Utils util; OutputWriter* outputWriter; OutputWriter* accnosWriter; sumScreenData(){} sumScreenData(int s, int e, int a, int h, int minl, int maxl, int mn, map bs, string f, string sum, string cont, unsigned long long st, unsigned long long en, OutputWriter* oWriter, OutputWriter* aWriter) { startPos = s; endPos = e; minLength = minl; maxLength = maxl; maxAmbig = a; maxHomoP = h; maxN = mn; filename = f; outputWriter = oWriter; accnosWriter = aWriter; m = MothurOut::getInstance(); start = st; end = en; summaryfile = sum; contigsreport = cont; badSeqNames = bs; count = 0; } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/sensspeccommand.cpp000077500000000000000000000360451424121717000221230ustar00rootroot00000000000000/* * sensspeccommand.cpp * Mothur * * Created by Pat Schloss on 7/6/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "sensspeccommand.h" #include "calculator.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" //********************************************************************************************************************** vector SensSpecCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist); CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pcount); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["sensspec"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SensSpecCommand::getHelpString(){ try { string helpString = ""; helpString += "The sens.spec command determines the quality of the clusters.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SensSpecCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "sensspec") { pattern = "[filename],sensspec"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SensSpecCommand::SensSpecCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listFile = validParameter.validFile(parameters, "list"); if (listFile == "not found") { listFile = current->getListFile(); if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter.\n"); } else { m->mothurOut("You have no current list file and the list parameter is required.\n"); abort = true; } } else if (listFile == "not open") { abort = true; } else { current->setListFile(listFile); } phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not found") { phylipfile = ""; } else if (phylipfile == "not open") { abort = true; } else { distFile = phylipfile; format = "phylip"; current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not found") { columnfile = ""; } else if (columnfile == "not open") { abort = true; } else { distFile = columnfile; format = "column"; current->setColumnFile(columnfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not found") { namefile = ""; } else if (namefile == "not open") { namefile = ""; abort = true; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not found") { countfile = ""; } else if (countfile == "not open") { countfile = ""; abort = true; } else { current->setCountFile(countfile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? //give priority to column, then phylip columnfile = current->getColumnFile(); if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a phylip or column file.\n"); abort = true; } } }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column.\n"); abort = true; } if (columnfile != "") { if ((namefile == "") && (countfile == "")){ namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format.\n"); abort = true; } } } } if ((namefile == "") && (phylipfile != "")) { m->mothurOut("[WARNING]: there is no reason to include a name file with a phylip file. Ignoring.\n"); abort = false; } if (outputdir == ""){ outputdir += util.hasPath(listFile); } string temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "-1.00"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, precision); string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(listFile)); sensSpecFileName = getOutputFileName("sensspec",variables); } m->mothurOut("\nNOTE: sens.spec assumes that only unique sequences were used to generate the distance matrix.\n\n"); } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "SensSpecCommand"); exit(1); } } //*************************************************************************************************************** int SensSpecCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } int startTime = time(nullptr); processListFile(); if (m->getControl_pressed()) { util.mothurRemove(sensSpecFileName); return 0; } m->mothurOut("It took " + toString(time(nullptr) - startTime) + " to run sens.spec.\n"); m->mothurOut("\nOutput File Names: \n"); m->mothurOut(sensSpecFileName+"\n\n"); return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "execute"); exit(1); } } //*************************************************************************************************************** int SensSpecCommand::process(ListVector*& list, bool& getCutoff, string& origCutoff){ try { string label = list->getLabel(); if(getCutoff){ if(label != "unique"){ origCutoff = label; convert(label, cutoff); cutoff = util.ceilDist(cutoff, precision); origCutoff = toString(util.ceilDist(cutoff, precision)); }else{ origCutoff = "unique"; cutoff = 0.0000; } } //must read each time because cutoff changes string nameOrCount = ""; string thisNamefile = ""; if (countfile != "") { nameOrCount = "count"; thisNamefile = countfile; CountTable ct; ct.readTable(countfile, false, false); } else if (namefile != "") { nameOrCount = "name"; thisNamefile = namefile; //remove redundant names from list map thisNames = util.readNames(namefile); for (int i = 0; i < list->getNumBins(); i++) { string bin = list->get(i); vector binNames; util.splitAtChar(bin, binNames, ','); string newBin = ""; for (int j = 0; j < binNames.size(); j++) { map::iterator it = thisNames.find(binNames[j]); if (it != thisNames.end()) { newBin += "," + binNames[j]; } } newBin = newBin.substr(1); list->set(i, newBin); } } string distfile = columnfile; if (format == "phylip") { distfile = phylipfile; } OptiMatrix matrix(distfile, thisNamefile, nameOrCount, format, cutoff, false); SensSpecCalc senscalc(matrix, list); senscalc.getResults(matrix, truePositives, trueNegatives, falsePositives, falseNegatives); outputStatistics(label, origCutoff, list->getNumBins()); return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "process"); exit(1); } } //*************************************************************************************************************** void SensSpecCommand::processListFile(){ try{ setUpOutput(); bool getCutoff = 0; string origCutoff = ""; if(util.isEqual(cutoff, -1)) { getCutoff = 1; } else { origCutoff = toString(util.ceilDist(cutoff, precision)); } InputData input(listFile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list, getCutoff, origCutoff); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "processListFile"); exit(1); } } //*************************************************************************************************************** void SensSpecCommand::setUpOutput(){ try{ ofstream sensSpecFile; util.openOutputFile(sensSpecFileName, sensSpecFile); outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName); sensSpecFile << "label\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; m->mothurOut("label\tcutoff\tnumotus\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); sensSpecFile.close(); } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "setUpOutput"); exit(1); } } //*************************************************************************************************************** void SensSpecCommand::outputStatistics(string label, string cutoff, int numBins){ try{ long long tp = truePositives; long long fp = falsePositives; long long tn = trueNegatives; long long fn = falseNegatives; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); ofstream sensSpecFile; util.openOutputFileAppend(sensSpecFileName, sensSpecFile); sensSpecFile << label << '\t' << cutoff << '\t' << numBins << '\t'; sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; sensSpecFile << setprecision(4); sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t'; sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl; m->mothurOut(label + "\t" + toString(cutoff) + "\t" + toString(numBins) + "\t"+ toString(truePositives) + "\t" + toString(trueNegatives) + "\t" + toString(falsePositives) + "\t" + toString(falseNegatives) + "\t"); m->mothurOut(toString(sensitivity) + "\t" + toString(specificity) + "\t" + toString(positivePredictiveValue) + "\t" + toString(negativePredictiveValue) + "\t"); m->mothurOut(toString(falseDiscoveryRate) + "\t" + toString(accuracy) + "\t" + toString(matthewsCorrCoef) + "\t" + toString(f1Score) + "\n\n"); sensSpecFile.close(); } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "outputStatistics"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/sensspeccommand.h000077500000000000000000000030211424121717000215540ustar00rootroot00000000000000#ifndef SENSSPECCOMMAND_H #define SENSSPECCOMMAND_H /* * sensspeccommand.h * Mothur * * Created by Pat Schloss on 7/6/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "listvector.hpp" #include "inputdata.h" #include "optimatrix.h" #include "sensspeccalc.hpp" class SensSpecCommand : public Command { public: SensSpecCommand(string); ~SensSpecCommand(){} vector setParameters(); string getCommandName() { return "sens.spec"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219.\nhttp://www.mothur.org/wiki/Sens.spec"; } string getDescription() { return "sens.spec"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector< vector< int> > preProcessList(OptiMatrix& matrix, ListVector*); void processListFile(); void setUpOutput(); void outputStatistics(string, string, int); string listFile, distFile, sensSpecFileName, phylipfile, columnfile, namefile, countfile; string format; vector outputNames; set labels; //holds labels to be used double truePositives, falsePositives, trueNegatives, falseNegatives; bool abort, allLines, square; double cutoff; int precision; int process(ListVector*&, bool&, string&); }; #endif mothur-1.48.0/source/commands/seqerrorcommand.cpp000077500000000000000000001205641424121717000221420ustar00rootroot00000000000000/* * seqerrorcommand.cpp * Mothur * * Created by Pat Schloss on 7/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "seqerrorcommand.h" #include "alignreport.hpp" #include "qualityscores.h" #include "refchimeratest.h" //********************************************************************************************************************** vector SeqErrorCommand::setParameters(){ try { CommandParameter pquery("fasta", "InputTypes", "", "", "none", "none", "none","errorType",false,true,true); parameters.push_back(pquery); CommandParameter preference("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(preference); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(pqfile); CommandParameter preport("report", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(preport); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pignorechimeras); CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pthreshold); CommandParameter paligned("aligned", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(paligned); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["errorsummary"] = tempOutNames; outputTypes["errorseq"] = tempOutNames; outputTypes["errorquality"] = tempOutNames; outputTypes["errorqualforward"] = tempOutNames; outputTypes["errorqualreverse"] = tempOutNames; outputTypes["errorforward"] = tempOutNames; outputTypes["errorreverse"] = tempOutNames; outputTypes["errorcount"] = tempOutNames; outputTypes["errormatrix"] = tempOutNames; outputTypes["errorchimera"] = tempOutNames; outputTypes["errorref-query"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SeqErrorCommand::getHelpString(){ try { string helpString = ""; helpString += "The seq.error command reads a query alignment file and a reference alignment file and creates .....\n"; helpString += "The fasta parameter allows you to provide your dataset's fasta file.\n"; helpString += "The reference parameter contains the Mock sequences.\n"; helpString += "The qfile parameter ...\n"; helpString += "The report parameter...\n"; helpString += "The name parameter allows you to provide a name file associated with the fasta file.\n"; helpString += "The count parameter allows you to provide a count file associated with the fasta file.\n"; helpString += "The ignorechimeras parameter...\n"; helpString += "The threshold parameter...\n"; helpString += "Example seq.error(...).\n"; ; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SeqErrorCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "errorsummary") { pattern = "[filename],error.summary"; } else if (type == "errorseq") { pattern = "[filename],error.seq"; } else if (type == "errorquality") { pattern = "[filename],error.quality"; } else if (type == "errorqualforward") { pattern = "[filename],error.qual.forward"; } else if (type == "errorqualreverse") { pattern = "[filename],error.qual.reverse"; } else if (type == "errorforward") { pattern = "[filename],error.seq.forward"; } else if (type == "errorreverse") { pattern = "[filename],error.seq.reverse"; } else if (type == "errorcount") { pattern = "[filename],error.count"; } else if (type == "errormatrix") { pattern = "[filename],error.matrix"; } else if (type == "errorchimera") { pattern = "[filename],error.chimera"; } else if (type == "errorref-query") { pattern = "[filename],error.ref-query"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SeqErrorCommand::SeqErrorCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; queryFileName = validParameter.validFile(parameters, "fasta"); if (queryFileName == "not found") { queryFileName = current->getFastaFile(); if (queryFileName != "") { m->mothurOut("Using " + queryFileName + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fasta file and the fasta parameter is required.\n"); abort = true; } } else if (queryFileName == "not open") { queryFileName = ""; abort = true; } else { current->setFastaFile(queryFileName); } referenceFileName = validParameter.validFile(parameters, "reference"); if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command.\n"); abort = true; } else if (referenceFileName == "not open") { abort = true; } //check for optional parameters namesFileName = validParameter.validFile(parameters, "name"); if(namesFileName == "not found"){ namesFileName = ""; } else if (namesFileName == "not open") { namesFileName = ""; abort = true; } else { current->setNameFile(namesFileName); } //check for optional parameters countfile = validParameter.validFile(parameters, "count"); if(countfile == "not found"){ countfile = ""; } else if (countfile == "not open") { countfile = ""; abort = true; } else { current->setCountFile(countfile); } qualFileName = validParameter.validFile(parameters, "qfile"); if(qualFileName == "not found"){ qualFileName = ""; } else if (qualFileName == "not open") { qualFileName = ""; abort = true; } else { current->setQualFile(qualFileName); } reportFileName = validParameter.validFile(parameters, "report"); if(reportFileName == "not found"){ reportFileName = ""; } else if (reportFileName == "not open") { reportFileName = ""; abort = true; } if (outputdir == ""){ outputdir = util.hasPath(queryFileName); } if ((countfile != "") && (namesFileName != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp = validParameter.valid(parameters, "threshold"); if (temp == "not found") { temp = "1.00"; } util.mothurConvert(temp, threshold); temp = validParameter.valid(parameters, "ignorechimeras"); if (temp == "not found") { temp = "T"; } ignoreChimeras = util.isTrue(temp); temp = validParameter.valid(parameters, "aligned"); if (temp == "not found"){ temp = "t"; } aligned = util.isTrue(temp); if(aligned ){ if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){ m->mothurOut("if you use either a qual file or a report file, you have to have both."); m->mothurOutEndLine(); abort = true; } } else{ if(reportFileName != ""){ m->mothurOut("we are ignoring the report file if your sequences are not aligned. we will check that the sequences in your fasta and and qual file are the same length."); m->mothurOutEndLine(); } } } } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand"); exit(1); } } //*************************************************************************************************************** int SeqErrorCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); maxLength = 5000; totalBases = 0; totalMatches = 0; substitutionMatrix.resize(6); for(int i=0;i<6;i++){ substitutionMatrix[i].resize(6,0); } string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(queryFileName)); map variables; variables["[filename]"] = fileNameRoot; string errorSummaryFileName = getOutputFileName("errorsummary",variables); outputNames.push_back(errorSummaryFileName); outputTypes["errorsummary"].push_back(errorSummaryFileName); string errorSeqFileName = getOutputFileName("errorseq",variables); outputNames.push_back(errorSeqFileName); outputTypes["errorseq"].push_back(errorSeqFileName); string errorChimeraFileName = getOutputFileName("errorchimera",variables); outputNames.push_back(errorChimeraFileName); outputTypes["errorchimera"].push_back(errorChimeraFileName); vector referenceSeqs = getReferences(referenceFileName); //read in reference sequences - make sure there's no ambiguous bases if (m->getControl_pressed()) { return 0; } long long numSeqs = process(queryFileName, qualFileName, reportFileName, errorSummaryFileName, errorSeqFileName, errorChimeraFileName, referenceSeqs); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if(qualFileName != ""){ printErrorQuality(qScoreErrorMap); printQualityFR(qualForwardMap, qualReverseMap); } printErrorFRFile(errorForward, errorReverse); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } string errorCountFileName = getOutputFileName("errorcount",variables); ofstream errorCountFile; util.openOutputFile(errorCountFileName, errorCountFile); outputNames.push_back(errorCountFileName); outputTypes["errorcount"].push_back(errorCountFileName); m->mothurOut("\nMultiply error rate by 100 to obtain the percent sequencing errors.\n"); m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n"); m->mothurOut("Errors\tSequences\n"); errorCountFile << "Errors\tSequences\n"; for(int i=0;imothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n'); errorCountFile << i << '\t' << misMatchCounts[i] << endl; } errorCountFile.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } printSubMatrix(substitutionMatrix); string megAlignmentFileName = getOutputFileName("errorref-query",variables); ofstream megAlignmentFile; util.openOutputFile(megAlignmentFileName, megAlignmentFile); outputNames.push_back(megAlignmentFileName); outputTypes["errorref-query"].push_back(megAlignmentFileName); for(int i=0;imothurOut("It took " + toString(time(nullptr) - start) + " secs to check " + toString(numSeqs) + " sequences.\n"); bool extraOutput = false; if (extraOutput) { //read error.chimera ifstream in; util.openInputFile(errorChimeraFileName, in); int chimeraCount = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); if (pieces.size() != 0) { if (pieces[pieces.size()-1] == "2") { chimeraCount++; } } } in.close(); m->mothurOut("\nTrue chimeras found: " + toString(chimeraCount) + "\n"); m->mothurOut("\nReference names for chimeras:\n\n"); //read error.summary to extract names of parents ifstream in2; util.openInputFile(errorSummaryFileName, in2); while (!in2.eof()) { if (m->getControl_pressed()) { break; } string line = util.getline(in2); gobble(in2); vector pieces = util.splitWhiteSpace(line); if (pieces.size() != 0) { if (pieces[pieces.size()-1] == "2") { cout << pieces[1] << endl; } } } in2.close(); } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "execute"); exit(1); } } //*************************************************************************************************************** Compare getErrors(Sequence query, Sequence reference, MothurOut* m){ try { Compare errors; if(query.getAlignLength() != reference.getAlignLength()){ m->mothurOut("[WARNING]: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n"); } int alignLength = query.getAlignLength(); string q = query.getAligned(); string r = reference.getAligned(); int started = 0; errors.sequence = ""; for(int i=0;ierrorOut(e, "SeqErrorCommand", "getErrors"); exit(1); } } //*************************************************************************************************************** void printErrorData(Compare error, int numParentSeqs, ofstream& summaryFile, ofstream& errorFile, vector >& substitutionMatrix, bool ignoreChimeras){ string summaryOutput = error.queryName + '\t' + error.refName + '\t' + toString(error.weight) + '\t'; summaryOutput += toString(error.AA) + '\t' + toString(error.AT) + '\t' + toString(error.AG) + '\t' + toString(error.AC) + '\t'; summaryOutput += toString(error.TA) + '\t' + toString(error.TT) + '\t' + toString(error.TG) + '\t' + toString(error.TC) + '\t'; summaryOutput += toString(error.GA) + '\t' + toString(error.GT) + '\t' + toString(error.GG) + '\t' + toString(error.GC) + '\t'; summaryOutput += toString(error.CA) + '\t' + toString(error.CT) + '\t' + toString(error.CG) + '\t' + toString(error.CC) + '\t'; summaryOutput += toString(error.NA) + '\t' + toString(error.NT) + '\t' + toString(error.NG) + '\t' + toString(error.NC) + '\t'; summaryOutput += toString(error.Ai) + '\t' + toString(error.Ti) + '\t' + toString(error.Gi) + '\t' + toString(error.Ci) + '\t' + toString(error.Ni) + '\t'; summaryOutput += toString(error.dA) + '\t' + toString(error.dT) + '\t' + toString(error.dG) + '\t' + toString(error.dC) + '\t'; summaryOutput += toString(error.Ai + error.Ti + error.Gi + error.Ci) + '\t'; //insertions summaryOutput += toString(error.dA + error.dT + error.dG + error.dC) + '\t'; //deletions summaryOutput += toString(error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni)) + '\t'; //substitutions summaryOutput += toString(error.NA + error.NT + error.NG + error.NC + error.Ni) + '\t'; //ambiguities summaryOutput += toString(error.matches) + '\t' + toString(error.mismatches) + '\t' + toString(error.total) + '\t' + toString(error.errorRate) + '\t' + toString(numParentSeqs) + "\n"; summaryFile << (summaryOutput); summaryOutput = '>' + error.queryName + "\tref:" + error.refName + '\n' + error.sequence + '\n'; errorFile << (summaryOutput); int a=0; int t=1; int g=2; int c=3; int gap=4; int n=5; if(numParentSeqs == 1 || !ignoreChimeras){ substitutionMatrix[a][a] += error.weight * error.AA; substitutionMatrix[a][t] += error.weight * error.TA; substitutionMatrix[a][g] += error.weight * error.GA; substitutionMatrix[a][c] += error.weight * error.CA; substitutionMatrix[a][gap] += error.weight * error.dA; substitutionMatrix[a][n] += error.weight * error.NA; substitutionMatrix[t][a] += error.weight * error.AT; substitutionMatrix[t][t] += error.weight * error.TT; substitutionMatrix[t][g] += error.weight * error.GT; substitutionMatrix[t][c] += error.weight * error.CT; substitutionMatrix[t][gap] += error.weight * error.dT; substitutionMatrix[t][n] += error.weight * error.NT; substitutionMatrix[g][a] += error.weight * error.AG; substitutionMatrix[g][t] += error.weight * error.TG; substitutionMatrix[g][g] += error.weight * error.GG; substitutionMatrix[g][c] += error.weight * error.CG; substitutionMatrix[g][gap] += error.weight * error.dG; substitutionMatrix[g][n] += error.weight * error.NG; substitutionMatrix[c][a] += error.weight * error.AC; substitutionMatrix[c][t] += error.weight * error.TC; substitutionMatrix[c][g] += error.weight * error.GC; substitutionMatrix[c][c] += error.weight * error.CC; substitutionMatrix[c][gap] += error.weight * error.dC; substitutionMatrix[c][n] += error.weight * error.NC; substitutionMatrix[gap][a] += error.weight * error.Ai; substitutionMatrix[gap][t] += error.weight * error.Ti; substitutionMatrix[gap][g] += error.weight * error.Gi; substitutionMatrix[gap][c] += error.weight * error.Ci; substitutionMatrix[gap][n] += error.weight * error.Ni; } } //********************************************************************************************************************** long long SeqErrorCommand::process(string filename, string qFileName, string rFileName, string summaryFileName, string errorOutputFileName, string chimeraOutputFileName, vector& referenceSeqs) { try { map weights; if(namesFileName != "") { weights = util.readNames(namesFileName); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); weights = ct.getNameMap(); } bool hasNameMap = false; if (weights.size() > 0) { hasNameMap = true; } int maxMismatch = 0; AlignReport report; QualityScores quality; misMatchCounts.resize(11, 0); map::iterator it; qScoreErrorMap['m'].assign(101, 0); qScoreErrorMap['s'].assign(101, 0); qScoreErrorMap['i'].assign(101, 0); qScoreErrorMap['a'].assign(101, 0); errorForward['m'].assign(maxLength,0); errorForward['s'].assign(maxLength,0); errorForward['i'].assign(maxLength,0); errorForward['d'].assign(maxLength,0); errorForward['a'].assign(maxLength,0); errorReverse['m'].assign(maxLength,0); errorReverse['s'].assign(maxLength,0); errorReverse['i'].assign(maxLength,0); errorReverse['d'].assign(maxLength,0); errorReverse['a'].assign(maxLength,0); //open inputfiles and go to beginning place for this processor ifstream queryFile; util.openInputFile(filename, queryFile); ifstream reportFile; ifstream qualFile; if((qFileName != "" && rFileName != "" && aligned)){ util.openInputFile(qFileName, qualFile); //gobble headers util.openInputFile(reportFileName, reportFile); report.readHeaders(reportFile); qualForwardMap.resize(maxLength); qualReverseMap.resize(maxLength); for(int i=0;i 1 && ignoreChimeras == 1) { ignoreSeq = 1; } else { ignoreSeq = 0; } Compare minCompare = getErrors(query, reference, m); if(hasNameMap){ it = weights.find(query.getName()); minCompare.weight = it->second; } else{ minCompare.weight = 1; } printErrorData(minCompare, numParentSeqs, out, outError, substitutionMatrix, ignoreChimeras); if(!ignoreSeq){ int numRs = 0; for(int i=0;imothurOut("[WARNING]: negative value for errorReverse " + query.getName() + "\n"); } errorReverse[letter][minCompare.total-(i-numRs)-1] += minCompare.weight; }else { numRs++; } } } if(aligned && qFileName != "" && rFileName != ""){ report.read(reportFile); int startBase = report.getQueryStart(); int endBase = report.getQueryEnd(); quality.read(qualFile); if(!ignoreSeq){ quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight); quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight); quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight); } } else if(!aligned && qFileName != ""){ quality.read(qualFile); int qualityLength = quality.getLength(); if(qualityLength != query.getNumBases()){ m->mothurOut("[WARNING]: - quality and fasta sequence files do not match at " + query.getName() + '\t' + toString(qualityLength) + '\t' + toString(query.getNumBases()) +'\n'); } int startBase = 1; int endBase = qualityLength; if(!ignoreSeq){ quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight); quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight); quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight); } } if(minCompare.errorRate <= threshold && !ignoreSeq){ totalBases += (minCompare.total * minCompare.weight); totalMatches += minCompare.matches * minCompare.weight; if(minCompare.mismatches > maxMismatch){ maxMismatch = minCompare.mismatches; misMatchCounts.resize(maxMismatch + 1, 0); } misMatchCounts[minCompare.mismatches] += minCompare.weight; megaAlignVector[closestRefIndex] += query.getInlineSeq() + '\n'; } index++; if (queryFile.eof()) { break; } if(index % 100 == 0){ m->mothurOutJustToScreen(toString(index)+"\n"); } } queryFile.close(); if(qFileName != "" && rFileName != "") { reportFile.close(); qualFile.close(); } else if(qFileName != "" && aligned == false){ qualFile.close(); } //report progress m->mothurOutJustToScreen(toString(index)+"\n"); return index; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "process"); exit(1); } } //*************************************************************************************************************** vector SeqErrorCommand::getReferences(string refFileName){ try { int numAmbigSeqs = 0; int maxStartPos = 0; int minEndPos = 100000; ifstream referenceFile; util.openInputFile(refFileName, referenceFile); vector referenceSeqs; while(!referenceFile.eof()){ if (m->getControl_pressed()) { break; } Sequence currentSeq(referenceFile); gobble(referenceFile); int numAmbigs = currentSeq.getAmbigBases(); if(numAmbigs > 0){ numAmbigSeqs++; } int startPos = currentSeq.getStartPos(); if(startPos > maxStartPos) { maxStartPos = startPos; } int endPos = currentSeq.getEndPos(); if(endPos < minEndPos) { minEndPos = endPos; } if (currentSeq.getNumBases() == 0) { m->mothurOut("[WARNING]: " + currentSeq.getName() + " is blank, ignoring.\n"); } else { referenceSeqs.push_back(currentSeq); } } referenceFile.close(); numRefs = referenceSeqs.size(); for(int i=0;imothurOut("[WARNING]: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored.\n"); } return referenceSeqs; } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "getReferences"); exit(1); } } //*************************************************************************************************************** void SeqErrorCommand::printSubMatrix(vector >& substitutionMatrix){ try { string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(queryFileName)); map variables; variables["[filename]"] = fileNameRoot; string subMatrixFileName = getOutputFileName("errormatrix",variables); ofstream subMatrixFile; util.openOutputFile(subMatrixFileName, subMatrixFile); outputNames.push_back(subMatrixFileName); outputTypes["errormatrix"].push_back(subMatrixFileName); vector bases(6); bases[0] = "A"; bases[1] = "T"; bases[2] = "G"; bases[3] = "C"; bases[4] = "Gap"; bases[5] = "N"; vector refSums(5,1); for(int i=0;i<5;i++){ subMatrixFile << "\tr" << bases[i]; for(int j=0;j<6;j++){ refSums[i] += substitutionMatrix[i][j]; } } subMatrixFile << endl; for(int i=0;i<6;i++){ subMatrixFile << 'q' << bases[i]; for(int j=0;j<5;j++){ subMatrixFile << '\t' << substitutionMatrix[j][i]; } subMatrixFile << endl; } subMatrixFile << "total"; for(int i=0;i<5;i++){ subMatrixFile << '\t' << refSums[i]; } subMatrixFile << endl; subMatrixFile.close(); } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "printSubMatrix"); exit(1); } } //*************************************************************************************************************** void SeqErrorCommand::printErrorFRFile(map >& errorForward, map >& errorReverse){ try{ string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(queryFileName)); map variables; variables["[filename]"] = fileNameRoot; string errorForwardFileName = getOutputFileName("errorforward",variables); ofstream errorForwardFile; util.openOutputFile(errorForwardFileName, errorForwardFile); outputNames.push_back(errorForwardFileName); outputTypes["errorforward"].push_back(errorForwardFileName); errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; for(int i=0;ierrorOut(e, "SeqErrorCommand", "printErrorFRFile"); exit(1); } } //*************************************************************************************************************** void SeqErrorCommand::printErrorQuality(map >& qScoreErrorMap){ try{ string fileNameRoot = outputdir + util.getRootName(util.getSimpleName(queryFileName)); map variables; variables["[filename]"] = fileNameRoot; string errorQualityFileName = getOutputFileName("errorquality",variables); ofstream errorQualityFile; util.openOutputFile(errorQualityFileName, errorQualityFile); outputNames.push_back(errorQualityFileName); outputTypes["errorquality"].push_back(errorQualityFileName); errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl; for(int i=0;i<101;i++){ errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl; } errorQualityFile.close(); } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "printErrorQuality"); exit(1); } } //*************************************************************************************************************** void SeqErrorCommand::printQualityFR(vector >& qualForwardMap, vector >& qualReverseMap){ try{ int numRows = 0; int numColumns = qualForwardMap[0].size(); for(int i=0;i variables; variables["[filename]"] = fileNameRoot; string qualityForwardFileName = getOutputFileName("errorqualforward",variables); ofstream qualityForwardFile; util.openOutputFile(qualityForwardFileName, qualityForwardFile); outputNames.push_back(qualityForwardFileName); outputTypes["errorqualforward"].push_back(qualityForwardFileName); for(int i=0;ierrorOut(e, "SeqErrorCommand", "printQualityFR"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/seqerrorcommand.h000077500000000000000000000051721424121717000216040ustar00rootroot00000000000000#ifndef SEQERRORCOMMAND #define SEQERRORCOMMAND /* * seqerrorcommand.h * Mothur * * Created by Pat Schloss on 7/15/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" #include "counttable.h" struct Compare { Compare(){ AA=0; AT=0; AG=0; AC=0; TA=0; TT=0; TG=0; TC=0; GA=0; GT=0; GG=0; GC=0; CA=0; CT=0; CG=0; CC=0; NA=0; NT=0; NG=0; NC=0; Ai=0; Ti=0; Gi=0; Ci=0; Ni=0; dA=0; dT=0; dG=0; dC=0; refName = ""; queryName = ""; weight = 1; matches = 0; mismatches = 0; total = 0; errorRate = 1.0000; sequence = ""; } ~Compare(){} int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC; string refName, queryName, sequence; double errorRate; int weight, matches, mismatches, total; }; class SeqErrorCommand : public Command { public: SeqErrorCommand(string); ~SeqErrorCommand(){} vector setParameters(); string getCommandName() { return "seq.error"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nhttp://www.mothur.org/wiki/Seq.error"; } string getDescription() { return "seq.error"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector getReferences(string); void printSubMatrix(vector >& substitutionMatrix); void printErrorFRFile(map >& errorForward, map >& errorReverse); void printErrorQuality(map >&); void printQualityFR(vector >& qualForwardMap, vector >& qualReverseMap); long long process(string, string, string, string, string, string, vector&); string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, countfile; double threshold; bool ignoreChimeras, aligned, abort; int maxLength, totalBases, totalMatches, numRefs; vector outputNames; vector > substitutionMatrix; vector > qualForwardMap; vector > qualReverseMap; vector misMatchCounts; map > qScoreErrorMap; map > errorForward; map > errorReverse; vector megaAlignVector; }; #endif mothur-1.48.0/source/commands/seqsummarycommand.cpp000066400000000000000000000430271424121717000225010ustar00rootroot00000000000000/* * seqcoordcommand.cpp * Mothur * * Created by Pat Schloss on 5/30/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "seqsummarycommand.h" #include "counttable.h" #include "summary.hpp" //********************************************************************************************************************** vector SeqSummaryCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "FastaReport", "none", "none","summary",false,true,true); parameters.push_back(pfasta); CommandParameter psummary("summary", "InputTypes", "", "", "FastaReport", "none", "none","",false,false,true); parameters.push_back(psummary); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "FastaReport", "none", "none","",false,false,true); parameters.push_back(pcontigsreport); CommandParameter palignreport("alignreport", "InputTypes", "", "", "FastaReport", "none", "none","",false,false,true); parameters.push_back(palignreport); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SeqSummaryCommand::getHelpString(){ try { string helpString = ""; helpString += "The summary.seqs command reads a fastafile, summary, contigsreport or alignreport file and summarizes it.\n"; helpString += "The summary.seqs command parameters are fasta, name, count, summary, contigsreport, alignreport and processors, fasta, contigsreport, alignreport or summary is required, unless you have a valid current files.\n"; helpString += "The name parameter allows you to enter a name file associated with your fasta file. \n"; helpString += "The count parameter allows you to enter a count file associated with your fasta file. \n"; helpString += "The summary.seqs command should be in the following format: \n"; helpString += "summary.seqs(fasta=yourFastaFile, processors=2) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SeqSummaryCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SeqSummaryCommand::SeqSummaryCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } summaryfile = validParameter.validFile(parameters, "summary"); if (summaryfile == "not open") { abort = true; } else if (summaryfile == "not found") { summaryfile = ""; } else { current->setSummaryFile(summaryfile); } contigsfile = validParameter.validFile(parameters, "contigsreport"); if (contigsfile == "not open") { abort = true; } else if (contigsfile == "not found") { contigsfile = ""; } else { current->setContigsReportFile(contigsfile); } alignfile = validParameter.validFile(parameters, "alignreport"); if (alignfile == "not open") { abort = true; } else if (alignfile == "not found") { alignfile = ""; } if ((summaryfile == "") && (fastafile == "") && (contigsfile == "") && (alignfile == "")) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { summaryfile = current->getSummaryFile(); if (summaryfile != "") { m->mothurOut("Using " + summaryfile + " as input file for the summary parameter.\n"); } else { contigsfile = current->getContigsReportFile(); if (contigsfile != "") { m->mothurOut("Using " + contigsfile + " as input file for the contigsreport parameter.\n"); } else { m->mothurOut("You have no current fasta, summary, contigsreport or alignreport file, one is required.\n"); abort = true; } } } } if (((fastafile != "") && ((summaryfile != "") || (contigsfile != "") || (alignfile != ""))) || ((summaryfile != "") && ((fastafile != "") || (contigsfile != "") || (alignfile != ""))) || ((contigsfile != "") && ((summaryfile != "") || (fastafile != "") || (alignfile != ""))) || ((alignfile != "") && ((summaryfile != "") || (contigsfile != "") || (fastafile != "")))) { m->mothurOut("[ERROR]: you may only use one of the following: fasta, summary, contigsreport or alignreport.\n"); abort = true; } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (outputdir == ""){ outputdir += util.hasPath(fastafile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); } } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand"); exit(1); } } //*************************************************************************************************************** int SeqSummaryCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFile = getOutputFileName("summary",variables); string nameOrCount = countfile; if (namefile != "") { nameOrCount = namefile; } Summary sum(processors); if (fastafile != "") { sum.summarizeFasta(fastafile, nameOrCount, outputFile); } else if (summaryfile != "") { sum.summarizeFastaSummary(summaryfile, nameOrCount); } else if (contigsfile != "") { sum.summarizeContigsSummary(contigsfile, nameOrCount); } else if (alignfile != "") { sum.summarizeAlignSummary(alignfile, nameOrCount); } else { m->mothurOut("[ERROR]: Unknown type: you may only use one of the following: fasta, summary, contigsreport or alignreport.\n"); m->setControl_pressed(true); } if (m->getControl_pressed()) { util.mothurRemove(outputFile); return 0; } long long size = sum.getTotalSeqs(); long long numUniques = sum.getUniqueSeqs(); vector ptiles = sum.getDefaults(); if ((fastafile != "") || (summaryfile != "")) { vector starts = sum.getStart(); vector ends = sum.getEnd(); vector ambigs = sum.getAmbig(); vector lengths = sum.getLength(); vector homops = sum.getHomop(); m->mothurOutEndLine(); m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs\n"); m->mothurOut("Minimum:\t" + toString(starts[0]) + "\t" + toString(ends[0]) + "\t" + toString(lengths[0]) + "\t" + toString(ambigs[0]) + "\t" + toString(homops[0]) + "\t" + toString(ptiles[0])); m->mothurOutEndLine(); m->mothurOut("2.5%-tile:\t" + toString(starts[1]) + "\t" + toString(ends[1]) + "\t" + toString(lengths[1]) + "\t" + toString(ambigs[1]) + "\t" + toString(homops[1]) + "\t" + toString(ptiles[1])); m->mothurOutEndLine(); m->mothurOut("25%-tile:\t" + toString(starts[2]) + "\t" + toString(ends[2]) + "\t" + toString(lengths[2]) + "\t" + toString(ambigs[2]) + "\t" + toString(homops[2]) + "\t" + toString(ptiles[2])); m->mothurOutEndLine(); m->mothurOut("Median: \t" + toString(starts[3]) + "\t" + toString(ends[3]) + "\t" + toString(lengths[3]) + "\t" + toString(ambigs[3]) + "\t" + toString(homops[3]) + "\t" + toString(ptiles[3])); m->mothurOutEndLine(); m->mothurOut("75%-tile:\t" + toString(starts[4]) + "\t" + toString(ends[4]) + "\t" + toString(lengths[4]) + "\t" + toString(ambigs[4]) + "\t" + toString(homops[4]) + "\t" + toString(ptiles[4])); m->mothurOutEndLine(); m->mothurOut("97.5%-tile:\t" + toString(starts[5]) + "\t" + toString(ends[5]) + "\t" + toString(lengths[5]) + "\t" + toString(ambigs[5]) + "\t" + toString(homops[5]) + "\t" + toString(ptiles[5])); m->mothurOutEndLine(); m->mothurOut("Maximum:\t" + toString(starts[6]) + "\t" + toString(ends[6]) + "\t" + toString(lengths[6]) + "\t" + toString(ambigs[6]) + "\t" + toString(homops[6]) + "\t" + toString(ptiles[6])); m->mothurOutEndLine(); m->mothurOut("Mean:\t" + toString(starts[7]) + "\t" + toString(ends[7]) + "\t" + toString(lengths[7]) + "\t" + toString(ambigs[7]) + "\t" + toString(homops[7])); m->mothurOutEndLine(); }else if (contigsfile != "") { vector ostarts = sum.getOStart(); vector oends = sum.getOEnd(); vector length = sum.getLength(); vector olengths = sum.getOLength(); vector numns = sum.getNumNs(); vector mismatches = sum.getMisMatches(); m->mothurOutEndLine(); m->mothurOut("\t\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\tNumSeqs\n"); m->mothurOut("Minimum:\t" + toString(length[0]) + "\t" + toString(olengths[0]) + "\t" + toString(ostarts[0]) + "\t" + toString(oends[0]) + "\t" + toString(mismatches[0]) + "\t" + toString(numns[0]) + "\t" + toString(ptiles[0])); m->mothurOutEndLine(); m->mothurOut("2.5%-tile:\t" + toString(length[1]) + "\t" + toString(olengths[1]) + "\t" + toString(ostarts[1]) + "\t" + toString(oends[1]) + "\t" + toString(mismatches[1]) + "\t" + toString(numns[1]) + "\t" + toString(ptiles[1])); m->mothurOutEndLine(); m->mothurOut("25%-tile:\t" + toString(length[2]) + "\t" + toString(olengths[2]) + "\t" + toString(ostarts[2]) + "\t" + toString(oends[2]) + "\t" + toString(mismatches[2]) + "\t" + toString(numns[2]) + "\t" + toString(ptiles[2])); m->mothurOutEndLine(); m->mothurOut("Median: \t" + toString(length[3]) + "\t" + toString(olengths[3]) + "\t" + toString(ostarts[3]) + "\t" + toString(oends[3]) + "\t" + toString(mismatches[3]) + "\t" + toString(numns[3]) + "\t" + toString(ptiles[3])); m->mothurOutEndLine(); m->mothurOut("75%-tile:\t" + toString(length[4]) + "\t" + toString(olengths[4]) + "\t" + toString(ostarts[4]) + "\t" + toString(oends[4]) + "\t" + toString(mismatches[4]) + "\t" + toString(numns[4]) + "\t" + toString(ptiles[4])); m->mothurOutEndLine(); m->mothurOut("97.5%-tile:\t" + toString(length[5]) + "\t" + toString(olengths[5]) + "\t" + toString(ostarts[5]) + "\t" + toString(oends[5]) + "\t" + toString(mismatches[5]) + "\t" + toString(numns[5]) + "\t" + toString(ptiles[5])); m->mothurOutEndLine(); m->mothurOut("Maximum:\t" + toString(length[6]) + "\t" + toString(olengths[6]) + "\t" + toString(ostarts[6]) + "\t" + toString(oends[6]) + "\t" + toString(mismatches[6]) + "\t" + toString(numns[6]) + "\t" + toString(ptiles[6])); m->mothurOutEndLine(); m->mothurOut("Mean:\t" + toString(length[7]) + "\t" + toString(olengths[7]) + "\t" + toString(ostarts[7]) + "\t" + toString(oends[7]) + "\t" + toString(mismatches[7]) + "\t" + toString(numns[7]) ); m->mothurOutEndLine(); }else if (alignfile != "") { vector sims = sum.getSims(); vector scores = sum.getScores(); vector inserts = sum.getNumInserts(); vector length = sum.getLength(); m->mothurOutEndLine(); m->mothurOut("\t\tLength\tSimBtwnQueryTemplate\tLongestInsert\tSearchScore\tNumSeqs\n"); m->mothurOut("Minimum:\t" + toString(length[0]) + "\t" + toString(sims[0]) + "\t" + toString(inserts[0]) + "\t" + toString(scores[0]) + "\t" + toString(ptiles[0])); m->mothurOutEndLine(); m->mothurOut("2.5%-tile:\t" + toString(length[1]) + "\t" + toString(sims[1]) + "\t" + toString(inserts[1]) + "\t" + toString(scores[1]) + "\t" + toString(ptiles[1])); m->mothurOutEndLine(); m->mothurOut("25%-tile:\t" + toString(length[2]) + "\t" + toString(sims[2]) + "\t" + toString(inserts[2]) + "\t" + toString(scores[2]) + "\t" + toString(ptiles[2])); m->mothurOutEndLine(); m->mothurOut("Median: \t" + toString(length[3]) + "\t" + toString(sims[3]) + "\t" + toString(inserts[3]) + "\t" + toString(scores[3]) + "\t" + toString(ptiles[3])); m->mothurOutEndLine(); m->mothurOut("75%-tile:\t" + toString(length[4]) + "\t" + toString(sims[4]) + "\t" + toString(inserts[4]) + "\t" + toString(scores[4]) + "\t" + toString(ptiles[4])); m->mothurOutEndLine(); m->mothurOut("97.5%-tile:\t" + toString(length[5]) + "\t" + toString(sims[5]) + "\t" + toString(inserts[5]) + "\t" + toString(scores[5]) + "\t" + toString(ptiles[5])); m->mothurOutEndLine(); m->mothurOut("Maximum:\t" + toString(length[6]) + "\t" + toString(sims[6]) + "\t" + toString(inserts[6]) + "\t" + toString(scores[6]) + "\t" + toString(ptiles[6])); m->mothurOutEndLine(); m->mothurOut("Mean:\t" + toString(length[7]) + "\t" + toString(sims[7]) + "\t" + toString(inserts[7]) + "\t" + toString(scores[7])); m->mothurOutEndLine(); } if (m->getControl_pressed()) { util.mothurRemove(outputFile); return 0; } if ((namefile == "") && (countfile == "") && (summaryfile == "")) { m->mothurOut("# of Seqs:\t" + toString(numUniques)); m->mothurOutEndLine(); } else { m->mothurOut("# of unique seqs:\t" + toString(numUniques)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); } if (((namefile == "") && (countfile == "")) && (summaryfile == "")) { m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to summarize " + toString(numUniques) + " sequences.\n"); } else{ m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to summarize " + toString(size) + " sequences.\n"); } m->mothurOut("\nOutput File Names:\n"); if ((summaryfile == "") && (contigsfile == "") && (alignfile == "")) { m->mothurOut(outputFile); m->mothurOutEndLine(); outputNames.push_back(outputFile); outputTypes["summary"].push_back(outputFile); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("summary"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSummaryFile(currentName); } } } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "execute"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/seqsummarycommand.h000077500000000000000000000022551424121717000221470ustar00rootroot00000000000000#ifndef SEQSUMMARYCOMMAND_H #define SEQSUMMARYCOMMAND_H /* * seqcoordcommand.h * Mothur * * Created by Pat Schloss on 5/30/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "command.hpp" #include "sequence.hpp" /**************************************************************************************************/ class SeqSummaryCommand : public Command { public: SeqSummaryCommand(string); ~SeqSummaryCommand(){} vector setParameters(); string getCommandName() { return "summary.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Summary.seqs"; } string getDescription() { return "summarize the quality of sequences in an unaligned or aligned fasta file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string fastafile, namefile, countfile, summaryfile, contigsfile, alignfile; int processors; vector outputNames; }; #endif /**************************************************************************************************/ mothur-1.48.0/source/commands/setcurrentcommand.cpp000077500000000000000000000635341424121717000225010ustar00rootroot00000000000000/* * setcurrentcommand.cpp * Mothur * * Created by westcott on 3/16/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "setcurrentcommand.h" //********************************************************************************************************************** vector SetCurrentCommand::setParameters(){ try { CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pflow); CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfile); CommandParameter psample("sample", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psample); CommandParameter pbiom("biom", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pbiom); CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pphylip); CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcolumn); CommandParameter psummary("summary", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psummary); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname); CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(plist); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptaxonomy); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pconstaxonomy); CommandParameter pcontigsreport("contigsreport", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontigsreport); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pqfile); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter prabund("rabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psabund); CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign); CommandParameter porder("order", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(porder); CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptree); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pshared); CommandParameter pclr("clr", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pclr); CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pordergroup); CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcount); CommandParameter pcurrent("current", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcurrent); CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prelabund); CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psff); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter pclear("clear", "String", "", "", "", "", "","",false,false); parameters.push_back(pclear); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["summary"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SetCurrentCommand::getHelpString(){ try { string helpString = ""; helpString += "The set.current command allows you to set the current files saved by mothur.\n"; helpString += "The set.current command parameters are: current, clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, clr, fasta, qfile, sff, oligos, accnos, biom, count, summary, file, contigsreport, constaxonomy, taxonomy and sample.\n"; helpString += "The current parameter is used to input the output file from get.current. This function is intended to allow you to input filenames from previous instances on mothur. NOTE: If you have a current file set in the file *.current_files.summary file, and also set a value for that file type, the value set takes precedence. For example, if you run set.current(current=current_files.summary, fasta=abrecovery.fasta) and your have fasta=final.fasta in the *.current_files.summary file the current fasta file will be set to abrecovery.fasta.\n"; helpString += "The clear parameter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n"; helpString += "The set.current command should be in the following format: \n"; helpString += "set.current(fasta=yourFastaFile) or set.current(fasta=amazon.fasta, clear=name-accnos)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SetCurrentCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],current_files.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SetCurrentCommand::SetCurrentCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; currentFile = validParameter.validFile(parameters, "current"); if (currentFile == "not open") { m->mothurOut("Ignoring: " + parameters["current"]); m->mothurOutEndLine(); currentFile = ""; } else if (currentFile == "not found") { currentFile = ""; } if (currentFile != "") { readCurrentFiles(); } //setting variables overwrites the settings in the file. //check for parameters phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { m->mothurOut("Ignoring: " + parameters["phylip"]); m->mothurOutEndLine(); phylipfile = ""; } else if (phylipfile == "not found") { phylipfile = ""; } if (phylipfile != "") { current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { m->mothurOut("Ignoring: " + parameters["column"]); m->mothurOutEndLine(); columnfile = ""; } else if (columnfile == "not found") { columnfile = ""; } if (columnfile != "") { current->setColumnFile(columnfile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { m->mothurOut("Ignoring: " + parameters["list"]); m->mothurOutEndLine(); listfile = ""; } else if (listfile == "not found") { listfile = ""; } if (listfile != "") { current->setListFile(listfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["rabund"]); m->mothurOutEndLine(); rabundfile = ""; } else if (rabundfile == "not found") { rabundfile = ""; } if (rabundfile != "") { current->setRabundFile(rabundfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["sabund"]); m->mothurOutEndLine(); sabundfile = ""; } else if (sabundfile == "not found") { sabundfile = ""; } if (sabundfile != "") { current->setSabundFile(sabundfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { m->mothurOut("Ignoring: " + parameters["name"]); m->mothurOutEndLine(); namefile = ""; } else if (namefile == "not found") { namefile = ""; } if (namefile != "") { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; } else if (groupfile == "not found") { groupfile = ""; } if (groupfile != "") { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; } else if (countfile == "not found") { countfile = ""; } if (countfile != "") { current->setCountFile(countfile); } designfile = validParameter.validFile(parameters, "design"); if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; } else if (designfile == "not found") { designfile = ""; } if (designfile != "") { current->setDesignFile(designfile); } orderfile = validParameter.validFile(parameters, "order"); if (orderfile == "not open") { m->mothurOut("Ignoring: " + parameters["order"]); m->mothurOutEndLine(); orderfile = ""; } else if (orderfile == "not found") { orderfile = ""; } if (orderfile != "") { current->setOrderFile(orderfile); } treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { m->mothurOut("Ignoring: " + parameters["tree"]); m->mothurOutEndLine(); treefile = ""; } else if (treefile == "not found") { treefile = ""; } if (treefile != "") { current->setTreeFile(treefile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { m->mothurOut("Ignoring: " + parameters["shared"]); m->mothurOutEndLine(); sharedfile = ""; } else if (sharedfile == "not found") { sharedfile = ""; } if (sharedfile != "") { current->setSharedFile(sharedfile); } clrfile = validParameter.validFile(parameters, "clr"); if (clrfile == "not open") { m->mothurOut("Ignoring: " + parameters["clr"]); m->mothurOutEndLine(); clrfile = ""; } else if (clrfile == "not found") { clrfile = ""; } if (clrfile != "") { current->setCLRFile(clrfile); } ordergroupfile = validParameter.validFile(parameters, "ordergroup"); if (ordergroupfile == "not open") { m->mothurOut("Ignoring: " + parameters["ordergroup"]); m->mothurOutEndLine(); ordergroupfile = ""; } else if (ordergroupfile == "not found") { ordergroupfile = ""; } if (ordergroupfile != "") { current->setOrderGroupFile(ordergroupfile); } relabundfile = validParameter.validFile(parameters, "relabund"); if (relabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["relabund"]); m->mothurOutEndLine(); relabundfile = ""; } else if (relabundfile == "not found") { relabundfile = ""; } if (relabundfile != "") { current->setRelAbundFile(relabundfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { m->mothurOut("Ignoring: " + parameters["fasta"]); m->mothurOutEndLine(); fastafile = ""; } else if (fastafile == "not found") { fastafile = ""; } if (fastafile != "") { current->setFastaFile(fastafile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { m->mothurOut("Ignoring: " + parameters["qfile"]); m->mothurOutEndLine(); qualfile = ""; } else if (qualfile == "not found") { qualfile = ""; } if (qualfile != "") { current->setQualFile(qualfile); } sfffile = validParameter.validFile(parameters, "sff"); if (sfffile == "not open") { m->mothurOut("Ignoring: " + parameters["sff"]); m->mothurOutEndLine(); sfffile = ""; } else if (sfffile == "not found") { sfffile = ""; } if (sfffile != "") { current->setSFFFile(sfffile); } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not open") { m->mothurOut("Ignoring: " + parameters["oligos"]); m->mothurOutEndLine(); oligosfile = ""; } else if (oligosfile == "not found") { oligosfile = ""; } if (oligosfile != "") { current->setOligosFile(oligosfile); } accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { m->mothurOut("Ignoring: " + parameters["accnos"]); m->mothurOutEndLine(); accnosfile = ""; } else if (accnosfile == "not found") { accnosfile = ""; } if (accnosfile != "") { current->setAccnosFile(accnosfile); } taxonomyfile = validParameter.validFile(parameters, "taxonomy"); if (taxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["taxonomy"]); m->mothurOutEndLine(); taxonomyfile = ""; } else if (taxonomyfile == "not found") { taxonomyfile = ""; } if (taxonomyfile != "") { current->setTaxonomyFile(taxonomyfile); } contigsreportfile = validParameter.validFile(parameters, "contigsreport"); if (contigsreportfile == "not open") { m->mothurOut("Ignoring: " + parameters["contigsreport"]); m->mothurOutEndLine(); contigsreportfile = ""; } else if (contigsreportfile == "not found") { contigsreportfile = ""; } if (contigsreportfile != "") { current->setContigsReportFile(contigsreportfile); } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["constaxonomy"]); m->mothurOutEndLine(); constaxonomyfile = ""; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } if (constaxonomyfile != "") { current->setConsTaxonomyFile(constaxonomyfile); } flowfile = validParameter.validFile(parameters, "flow"); if (flowfile == "not open") { m->mothurOut("Ignoring: " + parameters["flow"]); m->mothurOutEndLine(); flowfile = ""; } else if (flowfile == "not found") { flowfile = ""; } if (flowfile != "") { current->setFlowFile(flowfile); } biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { m->mothurOut("Ignoring: " + parameters["biom"]); m->mothurOutEndLine(); biomfile = ""; } else if (biomfile == "not found") { biomfile = ""; } if (biomfile != "") { current->setBiomFile(biomfile); } summaryfile = validParameter.validFile(parameters, "summary"); if (summaryfile == "not open") { m->mothurOut("Ignoring: " + parameters["summary"]); m->mothurOutEndLine(); summaryfile = ""; } else if (summaryfile == "not found") { summaryfile = ""; } if (summaryfile != "") { current->setSummaryFile(summaryfile); } filefile = validParameter.validFile(parameters, "file"); if (filefile == "not open") { m->mothurOut("Ignoring: " + parameters["file"]); m->mothurOutEndLine(); filefile = ""; } else if (filefile == "not found") { filefile = ""; } if (filefile != "") { current->setFileFile(filefile); } samplefile = validParameter.validFile(parameters, "sample"); if (samplefile == "not open") { m->mothurOut("Ignoring: " + parameters["sample"]); m->mothurOutEndLine(); samplefile = ""; } else if (samplefile == "not found") { samplefile = ""; } if (samplefile != "") { current->setSampleFile(samplefile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } current->setProcessors(temp); clearTypes = validParameter.valid(parameters, "clear"); if (clearTypes == "not found") { clearTypes = ""; } else { util.splitAtDash(clearTypes, types); } } } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "SetCurrentCommand"); exit(1); } } //********************************************************************************************************************** int SetCurrentCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //user wants to clear a type if (types.size() != 0) { for (int i = 0; i < types.size(); i++) { if (m->getControl_pressed()) { break; } //look for file types if (types[i] == "fasta") { current->setFastaFile(""); }else if (types[i] == "qfile") { current->setQualFile(""); }else if (types[i] == "phylip") { current->setPhylipFile(""); }else if (types[i] == "column") { current->setColumnFile(""); }else if (types[i] == "list") { current->setListFile(""); }else if (types[i] == "rabund") { current->setRabundFile(""); }else if (types[i] == "sabund") { current->setSabundFile(""); }else if (types[i] == "name") { current->setNameFile(""); }else if (types[i] == "group") { current->setGroupFile(""); }else if (types[i] == "order") { current->setOrderFile(""); }else if (types[i] == "ordergroup") { current->setOrderGroupFile(""); }else if (types[i] == "tree") { current->setTreeFile(""); }else if (types[i] == "shared") { current->setSharedFile(""); }else if (types[i] == "relabund") { current->setRelAbundFile(""); }else if (types[i] == "clr") { current->setCLRFile(""); }else if (types[i] == "design") { current->setDesignFile(""); }else if (types[i] == "sff") { current->setSFFFile(""); }else if (types[i] == "oligos") { current->setOligosFile(""); }else if (types[i] == "accnos") { current->setAccnosFile(""); }else if (types[i] == "taxonomy") { current->setTaxonomyFile(""); }else if (types[i] == "constaxonomy") { current->setConsTaxonomyFile(""); }else if (types[i] == "contigsreport") { current->setContigsReportFile(""); }else if (types[i] == "flow") { current->setFlowFile(""); }else if (types[i] == "biom") { current->setBiomFile(""); }else if (types[i] == "count") { current->setCountFile(""); }else if (types[i] == "summary") { current->setSummaryFile(""); }else if (types[i] == "file") { current->setFileFile(""); }else if (types[i] == "sample") { current->setSampleFile(""); }else if (types[i] == "processors") { current->setProcessors("1"); }else if (types[i] == "all") { current->clearCurrentFiles(); }else { m->mothurOut("[ERROR]: mothur does not save a current file for " + types[i] + "\n"); } } } m->mothurOutEndLine(); m->mothurOut("Current files saved by mothur:\n"); if (current->hasCurrentFiles()) { map variables; variables["[filename]"] = util.getFullPathName(outputdir); string filename = getOutputFileName("summary", variables); current->printCurrentFiles(filename); outputNames.push_back(filename); outputTypes["summary"].push_back(filename); m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); } return 0; } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "execute"); exit(1); } } //********************************************************************************************************************** int SetCurrentCommand::readCurrentFiles(){ try{ ifstream in; util.openInputFile(currentFile, in); while(!in.eof()) { if (m->getControl_pressed()) { break; } string line = util.getline(in); gobble(in); vector pieces; util.splitAtChar(line, pieces, '='); if (pieces.size() != 2) { m->mothurOut("[ERROR]: " + util.getStringFromVector(pieces, ",") + " line is not in the correct format. Did you edit the file? Mothur expects tag=filename. Example: fasta=final.fasta\n"); m->setControl_pressed(true); } else{ //look for file types if (pieces[0] == "fasta") { current->setFastaFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "qfile") { current->setQualFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "phylip") { current->setPhylipFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "column") { current->setColumnFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "list") { current->setListFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "rabund") { current->setRabundFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "sabund") { current->setSabundFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "name") { current->setNameFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "group") { current->setGroupFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "order") { current->setOrderFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "ordergroup") { current->setOrderGroupFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "tree") { current->setTreeFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "shared") { current->setSharedFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "relabund") { current->setRelAbundFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "clr") { current->setCLRFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "design") { current->setDesignFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "sff") { current->setSFFFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "oligos") { current->setOligosFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "accnos") { current->setAccnosFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "taxonomy") { current->setTaxonomyFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "flow") { current->setFlowFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "biom") { current->setBiomFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "count") { current->setCountFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "summary") { current->setSummaryFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "file") { current->setFileFile(util.getFullPathName(pieces[1])); }else if (pieces[0] == "processors") { current->setProcessors(pieces[1]); }else { m->mothurOut("[ERROR]: mothur does not save a current file for " + util.getFullPathName(pieces[1])); m->mothurOutEndLine(); } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "SetCurrentCommand", "readCurrentFiles"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/setcurrentcommand.h000077500000000000000000000023711424121717000221360ustar00rootroot00000000000000#ifndef SETCURRENTCOMMAND_H #define SETCURRENTCOMMAND_H /* * setcurrentcommand.h * Mothur * * Created by westcott on 3/16/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" class SetCurrentCommand : public Command { public: SetCurrentCommand(string); ~SetCurrentCommand() = default; vector setParameters(); string getCommandName() { return "set.current"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Set.current"; } string getDescription() { return "set current files for mothur"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; bool abort; string clearTypes; vector types; string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile, summaryfile, currentFile, samplefile; string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile, filefile, constaxonomyfile, contigsreportfile, clrfile; int readCurrentFiles(); }; #endif mothur-1.48.0/source/commands/setdircommand.cpp000077500000000000000000000366331424121717000215750ustar00rootroot00000000000000/* * setoutdircommand.cpp * Mothur * * Created by westcott on 1/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "setdircommand.h" //********************************************************************************************************************** vector SetDirectoryCommand::setParameters(){ try { CommandParameter ptempdefault("mothurfiles", "String", "", "", "", "", "","",false,false); parameters.push_back(ptempdefault); CommandParameter ptools("tools", "String", "", "", "", "", "","",false,false); parameters.push_back(ptools); CommandParameter pdebug("debug", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdebug); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pmodnames("modifynames", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmodnames); CommandParameter pinput("input", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pinput); CommandParameter poutput("output", "String", "", "", "", "", "","",false,false,true); parameters.push_back(poutput); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SetDirectoryCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SetDirectoryCommand::getHelpString(){ try { string helpString = ""; helpString += "The set.dir command can be used to direct the output files generated by mothur to a specific place.\n"; helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n"; helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n"; helpString += "The set.dir command can also be used to set the location of the directory containing mothur's external tools, i.e. vsearch, uchime, prefetch, fasterqdump, if other than mothur's executable location. You can set this to /usr/bin for example. \n"; helpString += "The set.dir command can also be used to run mothur in debug mode.\n"; helpString += "The set.dir command can also be used to seed random.\n"; helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to avoid issues while making trees. modifynames=F will leave sequence names as they are.\n"; helpString += "The set.dir command parameters are input, output, mothurfiles and debug and one is required.\n"; helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n"; helpString += "To seed random set seed=yourRandomValue. By default mothur seeds random with the start time.\n"; helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n"; helpString += "To return the input to the current working directory you may enter: input=clear.\n"; helpString += "To set the output to the directory where mothur.exe is located you may enter: output=default.\n"; helpString += "To set the input to the directory where mothur.exe is located you may enter: input=default.\n"; helpString += "To return the mothurfiles location to the default you provided at compile time you may enter: mothurfiles=clear.\n"; helpString += "To set the mothurfiles to the directory where mothur.exe is located you may enter: mothurfiles=default.\n"; helpString += "The set.dir command should be in the following format: set.dir(output=yourOutputDirectory, input=yourInputDirectory, mothurfiles=yourTempDefault).\n"; helpString += "Example set.outdir(output=/Users/lab/desktop/outputs, input=/Users/lab/desktop/inputs).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SetDirectoryCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** SetDirectoryCommand::SetDirectoryCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; output = validParameter.validPath(parameters, "output"); if (output == "not found") { output = ""; } input = validParameter.validPath(parameters, "input"); if (input == "not found") { input = ""; } mothurfiles = validParameter.validPath(parameters, "mothurfiles"); if (mothurfiles == "not found") { mothurfiles = ""; } toolsLocation = validParameter.validPath(parameters, "tools"); if (toolsLocation == "not found") { toolsLocation = ""; } bool debug = false; bool nodebug = false; debugorSeedOnly = false; string temp = validParameter.valid(parameters, "debug"); if (temp == "not found") { debug = false; nodebug=true; } else { debug = util.isTrue(temp); } m->setDebug(debug); bool nomod = false; temp = validParameter.valid(parameters, "modifynames"); if (temp == "not found") { modifyNames = true; nomod=true; } else { modifyNames = util.isTrue(temp); } m->setChangedSeqNames(modifyNames); bool seed = false; temp = validParameter.valid(parameters, "seed"); if (temp == "not found") { random = 0; } else { if (util.isInteger(temp)) { util.mothurConvert(temp, random); seed = true; } else { m->mothurOut("[ERROR]: Seed must be an integer for the set.dir command.\n"); abort = true; } } if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); } if (seed) { m->setRandomSeed(random); m->mothurOut("Setting random seed to " + toString(random) + ".\n\n"); } if ((input == "") && (output == "") && (mothurfiles == "") && (toolsLocation == "")&& nodebug && nomod && !seed) { m->mothurOut("[ERROR]: You must provide either an input, output, mothurfiles, tools, debug or modifynames for the set.dir command.\n"); abort = true; }else if((input == "") && (output == "") && (mothurfiles == "") && (toolsLocation == "")) { debugorSeedOnly = true; } } } catch(exception& e) { m->errorOut(e, "SetDirectoryCommand", "SetDirectoryCommand"); exit(1); } } //********************************************************************************************************************** int SetDirectoryCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (debugorSeedOnly) { } else { m->mothurOut("Mothur's directories:\n"); //redirect output if ((output == "clear") || (output == "")) { output = ""; current->setOutputDir(output); } else if (output == "default") { string output = current->getProgramPath(); m->mothurOut("outputDir=" + output + "\n"); current->setOutputDir(output); }else { output = util.removeQuotes(output); if (util.mkDir(output)) { m->mothurOut("outputDir=" + output + "\n"); current->setOutputDir(output); } } //redirect input if ((input == "clear") || (input == "")) { input = ""; current->setInputDir(nullVector); } else if (input == "default") { string input = current->getProgramPath(); //input = exepath.substr(0, (exepath.find_last_of('m'))); m->mothurOut("inputDir=" + input+ "\n"); vector temp; temp.push_back(input); current->setInputDir(temp); }else { input = util.removeQuotes(input); vector inputPaths; vector temp; util.splitAtChar(input, temp, ';'); for (int i = 0; i < temp.size(); i++) { string inputPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = inputPath.substr(inputPath.length()-1); if (lastChar != PATH_SEPARATOR) { inputPath += PATH_SEPARATOR; } inputPath = util.getFullPathName(inputPath); if (util.dirCheckExists(inputPath)) { inputPaths.push_back(inputPath); } } if (inputPaths.size() != 0) { m->mothurOut("inputdir=\n"); for (int i = 0; i < inputPaths.size(); i++) { m->mothurOut("\t" + inputPaths[i] + "\n"); } m->mothurOutEndLine(); current->setInputDir(inputPaths); } } //set default location of mothurs files if (mothurfiles == "clear") { #ifdef MOTHUR_FILES string defaultPath = MOTHUR_FILES; vector defaultPaths; vector temp; util.splitAtChar(defaultPath, temp, ';'); for (int i = 0; i < temp.size(); i++) { string defaultPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = defaultPath.substr(defaultPath.length()-1); if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; } defaultPath = util.getFullPathName(defaultPath); defaultPaths.push_back(defaultPath); } if (defaultPaths.size() != 0) { m->mothurOut("mothurfiles=\n"); for (int i = 0; i < defaultPaths.size(); i++) { m->mothurOut("\t" + defaultPaths[i] + "\n"); } m->mothurOutEndLine(); } current->setDefaultPath(defaultPaths); #else m->mothurOut("No default directory defined at compile time.\n"); current->setDefaultPath(nullVector); #endif }else if (mothurfiles == "") { //do nothing }else if (mothurfiles == "default") { string tempdefault = current->getProgramPath(); m->mothurOut("mothurfiles=" + tempdefault+ "\n"); vector temp; temp.push_back(tempdefault); current->setDefaultPath(temp); }else { mothurfiles = util.removeQuotes(mothurfiles); vector defaultPaths; vector temp; util.splitAtChar(mothurfiles, temp, ';'); for (int i = 0; i < temp.size(); i++) { string defaultPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = defaultPath.substr(defaultPath.length()-1); if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; } defaultPath = util.getFullPathName(defaultPath); if (util.mkDir(defaultPath)) { defaultPaths.push_back(defaultPath); } } if (defaultPaths.size() != 0) { m->mothurOut("mothurfiles=\n"); for (int i = 0; i < defaultPaths.size(); i++) { m->mothurOut("\t" + defaultPaths[i] + "\n"); } m->mothurOutEndLine(); current->setDefaultPath(defaultPaths); } } //set default if ((toolsLocation == "default") || (toolsLocation == "clear")){ #ifdef MOTHUR_TOOLS string toolsPath = MOTHUR_TOOLS; vector toolsPaths; vector temp; util.splitAtChar(toolsPath, temp, ';'); for (int i = 0; i < temp.size(); i++) { string defaultPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = defaultPath.substr(defaultPath.length()-1); if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; } defaultPath = util.getFullPathName(defaultPath); toolsPaths.push_back(defaultPath); } if (toolsPaths.size() != 0) { m->mothurOut("mothurtools=\n"); for (int i = 0; i < toolsPaths.size(); i++) { m->mothurOut("\t" + toolsPaths[i] + "\n"); } m->mothurOutEndLine(); } current->setDefaultPath(toolsPaths); #else string temp = current->getProgramPath(); m->mothurOut("tools=" + temp+ "\n"); vector temps; temps.push_back(temp); current->setToolsPath(temps); #endif }else if (toolsLocation == "") { //do nothing }else { toolsLocation = util.removeQuotes(toolsLocation); vector defaultPaths; vector temp; util.splitAtChar(toolsLocation, temp, ';'); for (int i = 0; i < temp.size(); i++) { string defaultPath = util.removeQuotes(temp[i]); //add / to name if needed string lastChar = defaultPath.substr(defaultPath.length()-1); if (lastChar != PATH_SEPARATOR) { defaultPath += PATH_SEPARATOR; } defaultPath = util.getFullPathName(defaultPath); if (util.mkDir(defaultPath)) { defaultPaths.push_back(defaultPath); } } if (defaultPaths.size() != 0) { m->mothurOut("mothurtools=\n"); for (int i = 0; i < defaultPaths.size(); i++) { m->mothurOut("\t" + defaultPaths[i] + "\n"); } m->mothurOutEndLine(); current->setToolsPath(defaultPaths); } } } return 0; } catch(exception& e) { m->errorOut(e, "SetDirectoryCommand", "execute"); exit(1); } } //**********************************************************************************************************************/ mothur-1.48.0/source/commands/setdircommand.h000077500000000000000000000020461424121717000212310ustar00rootroot00000000000000#ifndef SETDIRCOMMAND_H #define SETDIRCOMMAND_H /* * setoutdircommand.h * Mothur * * Created by westcott on 1/21/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "commandfactory.hpp" /**********************************************************/ class SetDirectoryCommand : public Command { public: SetDirectoryCommand(string); ~SetDirectoryCommand(){} vector setParameters(); string getCommandName() { return "set.dir"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string){ return ""; } string getCitation() { return "http://www.mothur.org/wiki/Set.dir"; } string getDescription() { return "set various directories"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string output, input, mothurfiles, toolsLocation; bool abort, debugorSeedOnly, modifyNames; int random; vector outputNames; }; /**********************************************************/ #endif mothur-1.48.0/source/commands/setlogfilecommand.cpp000077500000000000000000000075421424121717000224350ustar00rootroot00000000000000/* * setlogfilecommand.cpp * Mothur * * Created by westcott on 4/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "setlogfilecommand.h" //********************************************************************************************************************** vector SetLogFileCommand::setParameters(){ try { CommandParameter pappend("append", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pappend); CommandParameter pname("name", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pname); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SetLogFileCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SetLogFileCommand::getHelpString(){ try { string helpString = ""; helpString += "The set.logfile command can be used to provide a specific name for your logfile and/or to append the log generated by mothur to an existing file.\n"; helpString += "The set.logfile command parameters are name and append, name is required. Append is set to false by default.\n"; helpString += "The set.logfile command should be in the following format: set.logfile(name=yourLogFileName, append=T).\n"; helpString += "Example set.logfile(name=/Users/lab/desktop/output.txt, append=T).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SetLogFileCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** SetLogFileCommand::SetLogFileCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; name = validParameter.validPath(parameters, "name"); if (name == "not found") { m->mothurOut("name is a required parameter for the set.logfile command."); abort = true; } string temp = validParameter.valid(parameters, "append"); if (temp == "not found") { temp = "F"; } append = util.isTrue(temp); if (outputdir == ""){ outputdir = util.hasPath(name); } } } catch(exception& e) { m->errorOut(e, "SetLogFileCommand", "SetLogFileCommand"); exit(1); } } //********************************************************************************************************************** int SetLogFileCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string directory = util.hasPath(name); if (directory == "") { m->setLogFileName(outputdir+name, append); }else if (util.dirCheckWritable(directory)) { m->setLogFileName(name, append); } m->mothurOut("\nSetting logfile name to " + m->getLogFileName() + "\n\n"); return 0; } catch(exception& e) { m->errorOut(e, "SetLogFileCommand", "execute"); exit(1); } } //**********************************************************************************************************************/ mothur-1.48.0/source/commands/setlogfilecommand.h000077500000000000000000000017371424121717000221020ustar00rootroot00000000000000#ifndef SETLOGFILECOMMAND_H #define SETLOGFILECOMMAND_H /* * setlogfilecommand.h * Mothur * * Created by westcott on 4/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "commandfactory.hpp" /**********************************************************/ class SetLogFileCommand : public Command { public: SetLogFileCommand(string); ~SetLogFileCommand(){} vector setParameters(); string getCommandName() { return "set.logfile"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string){ return ""; } string getCitation() { return "http://www.mothur.org/wiki/Set.logfile"; } string getDescription() { return "set logfile name"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string name; bool abort, append; vector outputNames; }; /**********************************************************/ #endif mothur-1.48.0/source/commands/setseedcommand.cpp000066400000000000000000000072061424121717000217260ustar00rootroot00000000000000// // setseedcommand.cpp // Mothur // // Created by Sarah Westcott on 3/24/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "setseedcommand.h" //********************************************************************************************************************** vector SetSeedCommand::setParameters(){ try { CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SetSeedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SetSeedCommand::getHelpString(){ try { string helpString = ""; helpString += "The set.seed command is used to seed random.\n"; helpString += "The set.seed command parameter is seed, and it is required.\n"; helpString += "To seed random set seed=yourRandomValue. By default mothur seeds random with the start time.\n"; helpString += "Example set.seed(seed=12345).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SetSeedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** SetSeedCommand::SetSeedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; bool seed = false; string temp = validParameter.valid(parameters, "seed"); if (temp == "not found") { random = 0; m->mothurOut("[ERROR]: You must provide a seed value or set seed to clear.\n"); abort = true;} else if (temp == "clear") { random = time(nullptr); seed = true; }else { if (util.isInteger(temp)) { util.mothurConvert(temp, random); seed = true; } else { m->mothurOut("[ERROR]: Seed must be an integer for the set.dir command.\n"); abort = true; } } } } catch(exception& e) { m->errorOut(e, "SetSeedCommand", "SetSeedCommand"); exit(1); } } //********************************************************************************************************************** int SetSeedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } m->setRandomSeed(random); m->mothurOut("Setting random seed to " + toString(random) + ".\n\n"); return 0; } catch(exception& e) { m->errorOut(e, "SetSeedCommand", "execute"); exit(1); } } //**********************************************************************************************************************/ mothur-1.48.0/source/commands/setseedcommand.h000077500000000000000000000020671424121717000213760ustar00rootroot00000000000000// // setseedcommand.h // Mothur // // Created by Sarah Westcott on 3/24/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef __Mothur__setseedcommand__ #define __Mothur__setseedcommand__ #include "command.hpp" #include "commandfactory.hpp" /**********************************************************/ class SetSeedCommand : public Command { public: SetSeedCommand(string); ~SetSeedCommand(){} vector setParameters(); string getCommandName() { return "set.seed"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string){ return ""; } string getCitation() { return "http://www.mothur.org/wiki/Set.seed"; } string getDescription() { return "set random seed"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; int random; vector outputNames; }; /**********************************************************/ #endif /* defined(__Mothur__setseedcommand__) */ mothur-1.48.0/source/commands/sffinfocommand.cpp000077500000000000000000001316251424121717000217320ustar00rootroot00000000000000/* * sffinfocommand.cpp * Mothur * * Created by westcott on 7/7/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "sffinfocommand.h" #include "endiannessmacros.h" #include "trimoligos.h" #include "sequence.hpp" #include "qualityscores.h" //************************************************************************************** vector SffInfoCommand::setParameters(){ try { CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff); CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos); CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt); CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow); CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim); CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta); CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; hasAccnos = false; hasOligos = false; hasGroup = false; split = 1; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["sfftxt"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["sff"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "setParameters"); exit(1); } } //**************************************************************************************** string SffInfoCommand::getHelpString(){ try { string helpString = ""; helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sff file.\n"; helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n"; helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.\n"; helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n"; helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n"; helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n"; helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n"; helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n"; helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted.\n"; helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "getHelpString"); exit(1); } } //************************************************************************************ string SffInfoCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],fasta-[filename],[tag],fasta"; } else if (type == "flow") { pattern = "[filename],flow"; } else if (type == "sfftxt") { pattern = "[filename],sff.txt"; } else if (type == "sff") { pattern = "[filename],[group],sff"; } else if (type == "qfile") { pattern = "[filename],qual-[filename],[tag],qual"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "getOutputPattern"); exit(1); } } //******************************************************************************* SffInfoCommand::SffInfoCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; string inputDir = validParameter.validPath(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } sffFilename = validParameter.validFile(parameters, "sff"); if (sffFilename == "not found") { sffFilename = current->getSFFFile(); if (sffFilename != "") { m->mothurOut("Using " + sffFilename + " as input file for the sff parameter.\n"); } else { m->mothurOut("[ERROR]: You have no current sff file and the sff parameter is required.\n"); abort = true; } } else if (sffFilename == "not open") { abort = true; } else { current->setSFFFile(sffFilename); } accnosName = validParameter.validFile(parameters, "accnos"); if (accnosName == "not found") { accnosName = ""; } else if (accnosName == "not open") { accnosName = ""; abort = true; } else { current->setAccnosFile(accnosName); hasAccnos = true; } oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not found") { oligosfile = ""; } else if (oligosfile == "not open") { oligosfile = ""; abort = true; } else { current->setOligosFile(oligosfile); hasOligos = true; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not found") { groupfile = ""; } else if (groupfile == "not open") { groupfile = ""; abort = true; } else { current->setGroupFile(groupfile); hasGroup = true; } sfftxtFilename = validParameter.valid(parameters, "sfftxt"); if (sfftxtFilename == "not found") { sfftxt = false; sfftxtFilename = ""; } else if (util.isTrue(sfftxtFilename)) { sfftxt = true; sfftxtFilename = ""; } else { //you are a filename if (inputDir != "") { map::iterator it = parameters.find("sfftxt"); //user has given a template file if(it != parameters.end()){ string path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["sfftxt"] = inputDir + it->second; } } } sfftxtFilename = validParameter.validFile(parameters, "sfftxt"); if (sfftxtFilename == "not found") { sfftxtFilename = ""; } else if (sfftxtFilename == "not open") { sfftxtFilename = ""; abort = true; } } if ((hasGroup) || (hasOligos)) { split = 2; } if (hasGroup && hasOligos) { m->mothurOut("[ERROR]: You may enter ONLY ONE of the following: oligos or group.\n"); abort = true; } string temp = validParameter.valid(parameters, "qfile"); if (temp == "not found"){ temp = "T"; } qual = util.isTrue(temp); temp = validParameter.valid(parameters, "fasta"); if (temp == "not found"){ temp = "T"; } fasta = util.isTrue(temp); temp = validParameter.valid(parameters, "flow"); if (temp == "not found"){ temp = "T"; } flow = util.isTrue(temp); temp = validParameter.valid(parameters, "trim"); if (temp == "not found"){ temp = "T"; } trim = util.isTrue(temp); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "F"; } reorient = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "SffInfoCommand"); exit(1); } } //******************************************************************************** int SffInfoCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } long start = time(nullptr); sffFilename = util.getFullPathName(sffFilename); m->mothurOut("Extracting info from " + sffFilename + " ...\n" ); string oligos = ""; if (hasOligos) { oligos = oligosfile; } if (hasGroup) { oligos = groupfile; } int numReads = extractSffInfo(sffFilename, accnosName, oligos); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to extract " + toString(numReads) + ".\n"); if (sfftxtFilename != "") { parseSffTxt(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("flow"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFlowFile(currentName); } } //report output filenames m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "execute"); exit(1); } } //********************************************************************************** int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){ try { oligosObject = new Oligos(); currentFileName = input; if (outputdir == "") { outputdir += util.hasPath(input); } if (accnos != "") { seqNames.clear(); seqNames = util.readAccnos(accnos); } else { seqNames.clear(); } TrimOligos* trimOligos = nullptr; TrimOligos* rtrimOligos = nullptr; if (hasOligos) { readOligos(oligos); split = 2; if (m->getControl_pressed()) { delete oligosObject; return 0; } trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers()); numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size(); if (reorient) { rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes(), false); numBarcodes = oligosObject->getReorientedPairedBarcodes().size(); } } if (hasGroup) { readGroup(oligos); split = 2; } ofstream outSfftxt, outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; string rootName = outputdir + util.getRootName(util.getSimpleName(input)); if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; } map variables; variables["[filename]"] = rootName; string sfftxtFileName = getOutputFileName("sfftxt",variables); string outFlowFileName = getOutputFileName("flow",variables); if (!trim) { variables["[tag]"] = "raw"; } outFastaFileName = getOutputFileName("fasta",variables); outQualFileName = getOutputFileName("qfile",variables); if (sfftxt) { util.openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); } if (fasta) { util.openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } if (qual) { util.openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } if (flow) { util.openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } ifstream in; util.openInputFileBinary(input, in); SffCommonHeader* header = new SffCommonHeader(); bool goodHeader = header->read(in); if (!goodHeader) { delete oligosObject; if (hasOligos) { delete trimOligos; if (reorient) { delete rtrimOligos; } } return 0; } //print common header if (sfftxt) { header->printSFFTxt(outSfftxt); } if (flow) { outFlow << header->getNumFlows() << endl; } //read through the sff file int count = 0; int numFlows = header->getNumFlows(); while (!in.eof()) { bool print = true; SffRead* read = new SffRead(in, numFlows); if (!read->isOkay()) { break; } if (split > 1) { assignToSample(read, trimOligos, rtrimOligos); } //if you have provided an accosfile and this seq is not in it, then dont print if (seqNames.size() != 0) { if (seqNames.count(read->getName()) == 0) { print = false; } } //print if (print) { if (sfftxt) { read->printSffTxt(outSfftxt); } if (fasta) { read->printFasta(outFasta, trim); } if (qual) { read->printQuality(outQual, trim); } if (flow) { read->printFlow(outFlow); } } count++; delete read; //report progress if((count+1) % 10000 == 0){ m->mothurOutJustToScreen(toString(count+1)+"\n"); } if (m->getControl_pressed()) { count = 0; break; } if (count >= header->getNumReads()) { break; } //if (count >= 10000) { break; } //debug } //report progress if (!m->getControl_pressed()) { if((count) % 10000 != 0){ m->mothurOutJustToScreen(toString(count)+"\n"); } } in.close(); if (sfftxt) { outSfftxt.close(); } if (fasta) { outFasta.close(); } if (qual) { outQual.close(); } if (flow) { outFlow.close(); } if (split > 1) { //create new common headers for each file with the correct number of reads adjustCommonHeader(header); if (hasGroup) { delete groupMap; } map::iterator it; set namesToRemove; for(int i=0;ierrorOut(e, "SffInfoCommand", "extractSffInfo"); exit(1); } } //**************************************************************************************** int SffInfoCommand::adjustCommonHeader(SffCommonHeader*& header){ try { string endian = util.findEdianness(); for (int i = 0; i < filehandlesHeaders.size(); i++) { for (int j = 0; j < filehandlesHeaders[i].size(); j++) { ofstream out; util.openOutputFileBinaryAppend(filehandlesHeaders[i][j], out); header->printSampleCommonHeader(out, numSplitReads[i][j]); out.close(); } } ofstream outNoMatchHeader; string tempNoHeader = "tempNoMatchHeader"; util.openOutputFileBinary(tempNoHeader, outNoMatchHeader); header->printSampleCommonHeader(outNoMatchHeader, numNoMatch); outNoMatchHeader.close(); util.appendSFFFiles(noMatchFile, tempNoHeader); util.renameFile(tempNoHeader, noMatchFile); util.mothurRemove(tempNoHeader); return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "adjustCommonHeader"); exit(1); } } //*********************************************************************************************** void SffInfoCommand::assignToSample(SffRead*& read, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){ try { int barcodeIndex, primerIndex, trashCodeLength; trashCodeLength = 0; primerIndex = 0; barcodeIndex = 0; if (hasOligos) { trashCodeLength = findGroup(read, barcodeIndex, primerIndex, trimOligos, rtrimOligos); } else if (hasGroup) { trashCodeLength = findGroup(read, barcodeIndex, primerIndex, "groupMode"); } else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); } if(trashCodeLength == 0){ ofstream out; util.openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out); read->printSff(out); out.close(); numSplitReads[barcodeIndex][primerIndex]++; } else{ ofstream out; util.openOutputFileBinaryAppend(noMatchFile, out); read->printSff(out); out.close(); numNoMatch++; } } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "assignToSample"); exit(1); } } //*************************************************************************************************** int SffInfoCommand::findGroup(SffRead*& read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) { try { int success = 1; string trashCode = ""; int currentSeqsDiffs = 0; string seq = read->getBases(); int readLength = read->getBases().length(); unsigned short clipLeft = read->getClipQualLeft(); unsigned short clipRight = read->getClipQualRight(); for (int i = 0; i < readLength; i++) { seq[i] = toupper(seq[i]); } if (trim) { if(clipRight < clipLeft){ //don't trim right if (clipRight == 0) { seq = seq.substr(clipLeft-1); } else { seq = "NNNN"; } } else if((clipRight != 0) && ((clipRight-clipLeft) >= 0)){ seq = seq.substr((clipLeft-1), (clipRight-clipLeft+1)); } else { seq = seq.substr(clipLeft-1); } } Sequence currSeq(read->getName(), seq); Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq); if(success > ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } if(numBarcodes != 0){ vector results = trimOligos->stripBarcode(currSeq, barcode); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq); if(success > sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ vector results = trimOligos->stripForward(currSeq, primer); if (pairedOligos) { success = results[0] + results[2]; } else { success = results[0]; } if(success > pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if(numRPrimers != 0){ vector results = trimOligos->stripReverse(currSeq); success = results[0]; if(success > pdiffs) { trashCode += 'r'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > tdiffs) { trashCode += 't'; } if (reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; int thisCurrentSeqsDiffs = 0; int thisBarcodeIndex = 0; int thisPrimerIndex = 0; if(numBarcodes != 0){ vector results = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > bdiffs) { thisTrashCode += "b"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(numFPrimers != 0){ vector results = rtrimOligos->stripForward(savedSeq, thisPrimerIndex); if (pairedOligos) { thisSuccess = results[0] + results[2]; } else { thisSuccess = results[0]; } if(thisSuccess > pdiffs) { thisTrashCode += "f"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { trashCode = thisTrashCode; success = thisSuccess; currentSeqsDiffs = thisCurrentSeqsDiffs; barcode = thisBarcodeIndex; primer = thisPrimerIndex; }else { trashCode += "(" + thisTrashCode + ")"; } } if (trashCode.length() == 0) { //is this sequence in the ignore group string thisGroup = oligosObject->getGroupName(barcode, primer); int pos = thisGroup.find("ignore"); if (pos != string::npos) { trashCode += "i"; } } return trashCode.length(); } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "findGroup"); exit(1); } } //*************************************************************************************** int SffInfoCommand::findGroup(SffRead*& read, int& barcode, int& primer, string groupMode) { try { string trashCode = ""; primer = 0; string group = groupMap->getGroup(read->getName()); if (group == "not found") { trashCode += "g"; } //scrap for group else { barcode = GroupToFile[group]; } return trashCode.length(); } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "findGroup"); exit(1); } } //*********************************************************************************** int SffInfoCommand::parseSffTxt() { try { ifstream inSFF; util.openInputFile(sfftxtFilename, inSFF); if (outputdir == "") { outputdir += util.hasPath(sfftxtFilename); } //output file names ofstream outFasta, outQual, outFlow; string outFastaFileName, outQualFileName; string fileRoot = util.getRootName(util.getSimpleName(sfftxtFilename)); if (fileRoot.length() > 0) { //rip off last . fileRoot = fileRoot.substr(0, fileRoot.length()-1); fileRoot = util.getRootName(fileRoot); } map variables; variables["[filename]"] = fileRoot; string sfftxtFileName = getOutputFileName("sfftxt",variables); string outFlowFileName = getOutputFileName("flow",variables); if (!trim) { variables["[tag]"] = "raw"; } outFastaFileName = getOutputFileName("fasta",variables); outQualFileName = getOutputFileName("qfile",variables); if (fasta) { util.openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); } if (qual) { util.openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); } if (flow) { util.openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); } //read common header string commonHeader = util.getline(inSFF); string magicNumber = util.getline(inSFF); string version = util.getline(inSFF); string indexOffset = util.getline(inSFF); string indexLength = util.getline(inSFF); int numReads = parseHeaderLineToInt(inSFF); string headerLength = util.getline(inSFF); string keyLength = util.getline(inSFF); int numFlows = parseHeaderLineToInt(inSFF); string flowgramCode = util.getline(inSFF); string flowChars = util.getline(inSFF); string keySequence = util.getline(inSFF); gobble(inSFF); string seqName; if (flow) { outFlow << numFlows << endl; } for(int i=0;imothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + ".\n"); break; } SffRead read(numFlows); //parse read header inSFF >> seqName; seqName = seqName.substr(1); gobble(inSFF); read.setName(seqName); string runPrefix = parseHeaderLineToString(inSFF); read.setTimeStamp(runPrefix); string regionNumber = parseHeaderLineToString(inSFF); read.setRegion(regionNumber); string xyLocation = parseHeaderLineToString(inSFF); read.setXY(xyLocation); gobble(inSFF); string runName = parseHeaderLineToString(inSFF); string analysisName = parseHeaderLineToString(inSFF); string fullPath = parseHeaderLineToString(inSFF); gobble(inSFF); unsigned short readHeaderLen = parseHeaderLineToShort(inSFF); read.setHeaderLength(readHeaderLen); unsigned short nameLength = parseHeaderLineToShort(inSFF); read.setNameLength(nameLength); int numBases = parseHeaderLineToInt(inSFF); read.setNumBases(numBases); unsigned short clipQualLeft = parseHeaderLineToShort(inSFF); read.setClipQualLeft(clipQualLeft); unsigned short clipQualRight = parseHeaderLineToShort(inSFF); read.setClipQualRight(clipQualRight); unsigned short clipAdapLeft = parseHeaderLineToShort(inSFF); read.setClipAdapterLeft(clipAdapLeft); unsigned short clipAdapRight = parseHeaderLineToShort(inSFF); read.setClipAdapterRight(clipAdapRight); gobble(inSFF); //parse read vector flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.setFlowgrams(flowVector); vector flowIndices = parseHeaderLineToIntVector(inSFF, numBases); //adjust for print vector flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]); for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); } read.setFlowIndex(flowIndicesAdjusted); string bases = parseHeaderLineToString(inSFF); read.setBases(bases); vector qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.setQualScores(qualityScores); gobble(inSFF); //if you have provided an accosfile and this seq is not in it, then dont print bool print = true; if (seqNames.size() != 0) { if (seqNames.count(read.getName()) == 0) { print = false; } } //print if (print) { if (fasta) { read.printFasta(outFasta, trim); } if (qual) { read.printQuality(outQual, trim); } if (flow) { read.printFlow(outFlow); } } //report progress if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)+"\n"); } if (m->getControl_pressed()) { break; } } //report progress if (!m->getControl_pressed()) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)+"\n"); } } inSFF.close(); if (fasta) { outFasta.close(); } if (qual) { outQual.close(); } if (flow) { outFlow.close(); } return 0; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseSffTxt"); exit(1); } } //***************************************************************************** int SffInfoCommand::parseHeaderLineToInt(ifstream& file){ try { int number = 0; while (!file.eof()) { char c = file.get(); if (c == ':'){ file >> number; break; } } gobble(file); return number; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt"); exit(1); } } //***************************************************************************** unsigned short SffInfoCommand::parseHeaderLineToShort(ifstream& file){ try { string text; while (!file.eof()) { char c = file.get(); if (c == ':'){ file >> text; break; } } gobble(file); unsigned short value; util.mothurConvert(text, value); return value; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseHeaderLineToShort"); exit(1); } } //***************************************************************************** string SffInfoCommand::parseHeaderLineToString(ifstream& file){ try { string text; while (!file.eof()) { char c = file.get(); if (c == ':'){ file >> text; break; } } gobble(file); return text; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString"); exit(1); } } //******************************************************************************************** vector SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){ try { vector floatVector(length); while (!file.eof()) { char c = file.get(); if (c == ':'){ float temp; for(int i=0;i> temp; floatVector[i] = temp * 100; } break; } } gobble(file); return floatVector; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector"); exit(1); } } //****************************************************************************************** vector SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){ try { vector intVector(length); while (!file.eof()) { char c = file.get(); if (c == ':'){ for(int i=0;i> intVector[i]; } break; } } gobble(file); return intVector; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector"); exit(1); } } //*********************************************************************************************** bool SffInfoCommand::readOligos(string oligoFile){ try { filehandles.clear(); numSplitReads.clear(); filehandlesHeaders.clear(); bool allBlank = false; oligosObject->read(oligoFile); if (m->getControl_pressed()) { return false; } //error in reading oligos if (oligosObject->hasPairedPrimers() || oligosObject->hasPairedBarcodes()) { pairedOligos = true; m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->setControl_pressed(true); return true; }else { pairedOligos = false; numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size(); } numLinkers = oligosObject->getLinkers().size(); numSpacers = oligosObject->getSpacers().size(); numRPrimers = oligosObject->getReversePrimers().size(); vector groupNames = oligosObject->getGroupNames(); if (groupNames.size() == 0) { allBlank = true; } filehandles.resize(oligosObject->getBarcodeNames().size()); for(int i=0;igetPrimerNames().size();j++){ filehandles[i].push_back(""); } } if(split > 1){ set uniqueNames; //used to cleanup outputFileNames map barcodes = oligosObject->getBarcodes() ; map primers = oligosObject->getPrimers(); for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ string primerName = oligosObject->getPrimerName(itPrimer->second); string barcodeName = oligosObject->getBarcodeName(itBar->second); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; string comboName = ""; if(primerName == ""){ comboGroupName = barcodeName; }else{ if(barcodeName == ""){ comboGroupName = primerName; } else{ comboGroupName = barcodeName + "." + primerName; } } if(itPrimer->first == ""){ comboName = itBar->first; }else{ if(itBar->first == ""){ comboName = itPrimer->first; } else{ comboName = itBar->first + "." + itPrimer->first; } } if (comboName != "") { comboGroupName += "_" + comboName; } ofstream temp; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(currentFileName)); variables["[group]"] = comboGroupName; string thisFilename = getOutputFileName("sff",variables); if (uniqueNames.count(thisFilename) == 0) { outputNames.push_back(thisFilename); uniqueNames.insert(thisFilename); } filehandles[itBar->second][itPrimer->second] = thisFilename; util.openOutputFileBinary(thisFilename, temp); temp.close(); } } } } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(currentFileName)); variables["[group]"] = "scrap"; noMatchFile = getOutputFileName("sff",variables); util.mothurRemove(noMatchFile); numNoMatch = 0; filehandlesHeaders.resize(filehandles.size()); numSplitReads.resize(filehandles.size()); for (int i = 0; i < filehandles.size(); i++) { numSplitReads[i].resize(filehandles[i].size(), 0); for (int j = 0; j < filehandles[i].size(); j++) { filehandlesHeaders[i].push_back(filehandles[i][j]+"headers"); ofstream temp; util.openOutputFileBinary(filehandles[i][j]+"headers", temp); temp.close(); } } if (allBlank) { m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a split the sff file.\n"); split = 1; return false; } return true; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "readOligos"); exit(1); } } //*************************************************************************************************** bool SffInfoCommand::readGroup(string oligoFile){ try { filehandles.clear(); numSplitReads.clear(); filehandlesHeaders.clear(); groupMap = new GroupMap(); groupMap->readMap(oligoFile); //like barcodeNameVector - no primer names vector groups = groupMap->getNamesOfGroups(); filehandles.resize(groups.size()); for (int i = 0; i < filehandles.size(); i++) { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(currentFileName)); variables["[group]"] = groups[i]; string thisFilename = getOutputFileName("sff",variables); outputNames.push_back(thisFilename); ofstream temp; util.openOutputFileBinary(thisFilename, temp); temp.close(); filehandles[i].push_back(thisFilename); GroupToFile[groups[i]] = i; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(currentFileName)); variables["[group]"] = "scrap"; noMatchFile = getOutputFileName("sff",variables); util.mothurRemove(noMatchFile); numNoMatch = 0; filehandlesHeaders.resize(groups.size()); numSplitReads.resize(filehandles.size()); for (int i = 0; i < filehandles.size(); i++) { numSplitReads[i].resize(filehandles[i].size(), 0); for (int j = 0; j < filehandles[i].size(); j++) { string thisHeader = filehandles[i][j]+"headers"; filehandlesHeaders[i].push_back(thisHeader); ofstream temp; util.openOutputFileBinary(thisHeader, temp); temp.close(); } } return true; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "readGroup"); exit(1); } } //********************************************************************/ mothur-1.48.0/source/commands/sffinfocommand.h000077500000000000000000000046561424121717000214020ustar00rootroot00000000000000#ifndef SFFINFOCOMMAND_H #define SFFINFOCOMMAND_H /* * sffinfocommand.h * Mothur * * Created by westcott on 7/7/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "groupmap.h" #include "oligos.h" #include "trimoligos.h" #include "sffread.hpp" #include "sffheader.hpp" /**********************************************************/ class SffInfoCommand : public Command { public: SffInfoCommand(string); ~SffInfoCommand(){} vector setParameters(); string getCommandName() { return "sff.info"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Sffinfo"; } string getDescription() { return "extract sequences reads from a .sff file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sffFilename, sfftxtFilename, accnosName, currentFileName, oligosfile, noMatchFile, groupfile; vector outputNames; bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup, reorient, pairedOligos; int mycount, split, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch; unordered_set seqNames; GroupMap* groupMap; map GroupToFile; vector > numSplitReads; vector > filehandles; vector > filehandlesHeaders; Oligos* oligosObject; //extract sff file functions int extractSffInfo(string, string, string); //main function void assignToSample(SffRead*&, TrimOligos*&, TrimOligos*&); bool readOligos(string oligosFile); bool readGroup(string oligosFile); //assign read to sample when splitting int findGroup(SffRead*& read, int& barcode, int& primer, TrimOligos*&, TrimOligos*&); int findGroup(SffRead*&, int& barcode, int& primer, string); //common header functions int adjustCommonHeader(SffCommonHeader*&); //parsesfftxt file functions int parseSffTxt(); int parseHeaderLineToInt(ifstream&); unsigned short parseHeaderLineToShort(ifstream& file); vector parseHeaderLineToFloatVector(ifstream&, int); vector parseHeaderLineToIntVector(ifstream&, int); string parseHeaderLineToString(ifstream&); }; /**********************************************************/ #endif mothur-1.48.0/source/commands/sffmultiplecommand.cpp000077500000000000000000001330061424121717000226250ustar00rootroot00000000000000// // sffmultiplecommand.cpp // Mothur // // Created by Sarah Westcott on 8/14/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "sffmultiplecommand.h" //********************************************************************************************************************** vector SffMultipleCommand::setParameters(){ try { CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfile); //sffinfo CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim); //trim.flows CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); //shhh.flows CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup); CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pmaxiter); CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge); CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma); CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta); //trim.seqs parameters CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward); CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst); CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; abort = false; calledHelp = false; append=false; makeGroup=false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SffMultipleCommand::getHelpString(){ try { string helpString = ""; helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n"; helpString += "The sff.multiple command parameters are: "; vector parameters = setParameters(); for (int i = 0; i < parameters.size()-1; i++) { helpString += parameters[i] + ", "; } helpString += parameters[parameters.size()-1] + ".\n"; helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; helpString += "The maxambig parameter allows you to set the maximum number of ambiguous bases allowed. The default is -1.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The minlength parameter allows you to set and minimum sequence length. \n"; helpString += "The maxlength parameter allows you to set and maximum sequence length. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n"; helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n"; helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n"; helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SffMultipleCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],fasta"; } else if (type == "name") { pattern = "[filename],names"; } else if (type == "group") { pattern = "[filename],groups"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SffMultipleCommand::SffMultipleCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; filename = validParameter.validFile(parameters, "file"); if (filename == "not open") { filename = ""; abort = true; } else if (filename == "not found") { filename = ""; } string temp; temp = validParameter.valid(parameters, "trim"); if (temp == "not found"){ temp = "T"; } trim = util.isTrue(temp); temp = validParameter.valid(parameters, "minflows"); if (temp == "not found") { temp = "450"; } util.mothurConvert(temp, minFlows); temp = validParameter.valid(parameters, "maxflows"); if (temp == "not found") { temp = "450"; } util.mothurConvert(temp, maxFlows); temp = validParameter.valid(parameters, "maxhomop"); if (temp == "not found"){ temp = "9"; } util.mothurConvert(temp, maxHomoP); temp = validParameter.valid(parameters, "signal"); if (temp == "not found"){ temp = "0.50"; } util.mothurConvert(temp, signal); temp = validParameter.valid(parameters, "noise"); if (temp == "not found"){ temp = "0.70"; } util.mothurConvert(temp, noise); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "order"); if (temp == "not found"){ temp = "A"; } if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } else { if (toupper(temp[0]) == 'A') { flowOrder = "A"; } else if(toupper(temp[0]) == 'B'){ flowOrder = "B"; } else if(toupper(temp[0]) == 'I'){ flowOrder = "I"; } else { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } } temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found"){ temp = "0.01"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "mindelta"); if (temp == "not found"){ temp = "0.000001"; } minDelta = temp; temp = validParameter.valid(parameters, "maxiter"); if (temp == "not found"){ temp = "1000"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "large"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, largeSize); if (largeSize != 0) { large = true; } else { large = false; } if (largeSize < 0) { m->mothurOut("The value of the large cannot be negative.\n"); } temp = validParameter.valid(parameters, "sigma");if (temp == "not found") { temp = "60"; } util.mothurConvert(temp, sigma); temp = validParameter.valid(parameters, "flip"); if (temp == "not found") { flip = 0; } else { flip = util.isTrue(temp); } temp = validParameter.valid(parameters, "maxambig"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxAmbig); temp = validParameter.valid(parameters, "minlength"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, minLength); temp = validParameter.valid(parameters, "maxlength"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, maxLength); temp = validParameter.valid(parameters, "keepfirst"); if (temp == "not found") { temp = "0"; } convert(temp, keepFirst); temp = validParameter.valid(parameters, "removelast"); if (temp == "not found") { temp = "0"; } convert(temp, removeLast); temp = validParameter.valid(parameters, "allfiles"); if (temp == "not found") { temp = "F"; } allFiles = util.isTrue(temp); temp = validParameter.valid(parameters, "keepforward"); if (temp == "not found") { temp = "F"; } keepforward = util.isTrue(temp); temp = validParameter.validFile(parameters, "lookup"); if (temp == "not found") { string path = current->getProgramPath(); //string tempPath = path; //for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } //path = path.substr(0, (tempPath.find_last_of('m'))); #if defined NON_WINDOWS path += "lookupFiles/"; #else path += "lookupFiles\\"; #endif lookupFileName = util.getFullPathName(path) + "LookUp_Titanium.pat"; bool ableToOpen = util.checkLocations(lookupFileName, current->getLocations()); if (!ableToOpen) { abort=true; } }else if(temp == "not open") { lookupFileName = validParameter.validPath(parameters, "lookup"); //if you can't open it its not inputDir, try mothur excutable location string exepath = current->getProgramPath(); //string tempPath = exepath; //for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } //exepath = exepath.substr(0, (tempPath.find_last_of('m'))); string tryPath = util.getFullPathName(exepath) + util.getSimpleName(lookupFileName); m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine(); ifstream in2; bool ableToOpen = util.openInputFile(tryPath, in2, "noerror"); in2.close(); lookupFileName = tryPath; if (!ableToOpen) { m->mothurOut("Unable to open " + lookupFileName + ".\n"); abort=true; } }else { lookupFileName = temp; } } } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand"); exit(1); } } //********************************************************************************************************************** int SffMultipleCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } vector sffFiles, oligosFiles; readFile(sffFiles, oligosFiles); string thisOutputDir = outputdir; if (thisOutputDir == "") { thisOutputDir = util.hasPath(filename); } string fileroot = thisOutputDir + util.getRootName(util.getSimpleName(filename)); map variables; variables["[filename]"] = fileroot; string fasta = getOutputFileName("fasta",variables); string name = getOutputFileName("name",variables); string group = getOutputFileName("group",variables); if (m->getControl_pressed()) { return 0; } if (sffFiles.size() < processors) { processors = sffFiles.size(); m->mothurOut("Reducing processors to " + toString(sffFiles.size()) + ".\n"); } createProcesses(sffFiles, oligosFiles, fasta, name, group); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (append) { outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta); current->setFastaFile(fasta); outputNames.push_back(name); outputTypes["name"].push_back(name); current->setNameFile(name); if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); current->setGroupFile(group); } } current->setProcessors(toString(processors)); //report output filenames m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "execute"); exit(1); } } //********************************************************************************************************************** int SffMultipleCommand::readFile(vector& sffFiles, vector& oligosFiles){ try { ifstream in; util.openInputFile(filename, in); bool allBlank = true; bool allFull = true; string oligos, sff; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> sff; //ignore file pairing if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } gobble(in); } else { //check for oligos file bool ableToOpenSff = util.checkLocations(sff, current->getLocations()); oligos = ""; // get rest of line in case there is a oligos filename while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { oligos += c; } } if (ableToOpenSff) { sffFiles.push_back(sff); if (oligos != "") { bool ableToOpenOligos = util.checkLocations(oligos, current->getLocations()); if (ableToOpenOligos) { allBlank = false; } else { m->mothurOut("Can not find " + oligos + ". Ignoring.\n"); oligos = ""; } } if (oligos == "") { allFull = false; } oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file }else { m->mothurOut("Can not find " + sff + ". Ignoring.\n"); } } gobble(in); } in.close(); if (allBlank || allFull) { append = true; } if (allFull) { makeGroup = true; } return 0; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "readFile"); exit(1); } } //********************************************************************************************************************** void mergeOutputFileList(map >& files, map >& temp){ map >::iterator it; for (it = temp.begin(); it != temp.end(); it++) { map >::iterator it2 = files.find(it->first); if (it2 == files.end()) { //we do not already have this type so just add it files[it->first] = it->second; }else { //merge them for (int i = 0; i < (it->second).size(); i++) { files[it->first].push_back((it->second)[i]); } } } } /**************************************************************************************************/ struct sffMultipleData { string fasta, name, group; vector sffFiles, oligosFiles; int start, end; MothurOut* m; Utils util; int count; string flowOrder, lookupFileName, minDelta; bool trim, large, flip, allFiles, keepforward, append, makeGroup; int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; int maxIters, largeSize; float signal, noise, cutoff, sigma; int keepFirst, removeLast, maxAmbig; vector outputNames; map > outputTypes; sffMultipleData(){} sffMultipleData(vector sFiles, vector oFiles, string fa, string nm, string grp, int st, int en) { sffFiles = sFiles; oligosFiles = oFiles; fasta = fa; name = nm; group = grp; start = st; end = en; m = MothurOut::getInstance(); count = 0; } void setVariables(bool tr, bool lg, bool alf, bool flp, bool kpfo, bool mkg, bool app, int lgs, int bd, int td, int pd, int sd, int ld, int mxf, int mnf, int mnl, int mxl, int mxh, int mxi, int kpf, int rml, float sgn, float n, float cu, float sig, string fo, string lkf, string mnd) { trim = tr; large = lg; allFiles = alf; keepforward = kpfo; flip = flp; makeGroup = mkg; append = app; largeSize = lgs; tdiffs = td; bdiffs = bd; pdiffs = pd; sdiffs = sd; ldiffs = ld; maxFlows = mxf; minFlows = mnf; minLength = mnl; maxLength = mxl; maxHomoP = mxh; maxIters = mxi; signal = sgn; noise = n; cutoff = cu; sigma = sig; flowOrder = fo; lookupFileName = lkf; minDelta = mnd; keepFirst = kpf; removeLast = rml; } }; //********************************************************************************************************************** //runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file. void driverSFFMultiple(sffMultipleData* params){ try { params->util.mothurRemove(params->fasta); params->util.mothurRemove(params->name); params->util.mothurRemove(params->group); params->count = 0; for (int s = params->start; s < params->end; s++) { string sff = params->sffFiles[s]; string oligos = params->oligosFiles[s]; params->m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(params->sffFiles.size()) + ")\t<<<<<\n"); //run sff.info string inputString = "sff=" + sff + ", flow=T"; if (params->trim) { inputString += ", trim=T"; } params->m->mothurOut("/******************************************/\n"); params->m->mothurOut("Running command: sffinfo(" + inputString + ")\n"); Command* sffCommand = new SffInfoCommand(inputString); sffCommand->execute(); if (params->m->getControl_pressed()){ break; } map > filenames = sffCommand->getOutputFiles(); delete sffCommand; params->m->mothurOutEndLine(); //run summary.seqs on the fasta file string fastaFile = ""; map >::iterator it = filenames.find("fasta"); if (it != filenames.end()) { if ((it->second).size() != 0) { fastaFile = (it->second)[0]; } } else { params->m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); params->m->setControl_pressed(true); break; } inputString = "fasta=" + fastaFile + ", processors=1"; params->m->mothurOut("\nRunning command: summary.seqs(" + inputString + ")\n"); Command* summarySeqsCommand = new SeqSummaryCommand(inputString); summarySeqsCommand->execute(); if (params->m->getControl_pressed()){ break; } map > temp = summarySeqsCommand->getOutputFiles(); mergeOutputFileList(filenames, temp); delete summarySeqsCommand; params->m->mothurOutEndLine(); //run trim.flows on the fasta file string flowFile = ""; it = filenames.find("flow"); if (it != filenames.end()) { if ((it->second).size() != 0) { flowFile = (it->second)[0]; } } else { params->m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); params->m->setControl_pressed(true); break; } inputString = "flow=" + flowFile; if (oligos != "") { inputString += ", oligos=" + oligos; } inputString += ", maxhomop=" + toString(params->maxHomoP) + ", maxflows=" + toString(params->maxFlows) + ", minflows=" + toString(params->minFlows); inputString += ", pdiffs=" + toString(params->pdiffs) + ", bdiffs=" + toString(params->bdiffs) + ", ldiffs=" + toString(params->ldiffs) + ", sdiffs=" + toString(params->sdiffs); inputString += ", tdiffs=" + toString(params->tdiffs) + ", signal=" + toString(params->signal) + ", noise=" + toString(params->noise) + ", order=" + params->flowOrder + ", processors=1"; params->m->mothurOut("\nRunning command: trim.flows(" + inputString + ")\n"); Command* trimFlowCommand = new TrimFlowsCommand(inputString); trimFlowCommand->execute(); if (params->m->getControl_pressed()){ break; } temp = trimFlowCommand->getOutputFiles(); mergeOutputFileList(filenames, temp); delete trimFlowCommand; string fileFileName = ""; flowFile = ""; if (oligos != "") { it = temp.find("file"); if (it != temp.end()) { if ((it->second).size() != 0) { fileFileName = (it->second)[0]; } } else { params->m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); params->m->setControl_pressed(true); break; } }else { vector flowFiles; it = temp.find("flow"); if (it != temp.end()) { if ((it->second).size() != 0) { flowFiles = (it->second); } } else { params->m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); params->m->setControl_pressed(true); break; } for (int i = 0; i < flowFiles.size(); i++) { string end = flowFiles[i].substr(flowFiles[i].length()-9); if (end == "trim.flow") { flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking } } } if ((fileFileName == "") && (flowFile == "")) { params->m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); params->m->setControl_pressed(true); break; } if (fileFileName != "") { inputString = "file=" + fileFileName; } else { inputString = "flow=" + flowFile; } inputString += ", lookup=" + params->lookupFileName + ", cutoff=" + toString(params->cutoff); + ", maxiters=" + toString(params->maxIters); if (params->large) { inputString += ", large=" + toString(params->largeSize); } inputString += ", sigma=" +toString(params->sigma); inputString += ", mindelta=" + toString(params->minDelta); inputString += ", order=" + params->flowOrder; //run shhh.flows params->m->mothurOut("\nRunning command: shhh.flows(" + inputString + ")\n"); Command* shhhFlowCommand = new ShhherCommand(inputString); shhhFlowCommand->execute(); if (params->m->getControl_pressed()){ break; } temp = shhhFlowCommand->getOutputFiles(); mergeOutputFileList(filenames, temp); delete shhhFlowCommand; vector fastaFiles; vector nameFiles; it = temp.find("fasta"); if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } } else { params->m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); params->m->setControl_pressed(true); break; } it = temp.find("name"); if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } } else { params->m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); params->m->setControl_pressed(true); break; } //find fasta and name files with the shortest name. This is because if there is a composite name it will be the shortest. fastaFile = fastaFiles[0]; for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } } string nameFile = nameFiles[0]; for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } } inputString = "fasta=" + fastaFile + ", name=" + nameFile; if (oligos != "") { inputString += ", oligos=" + oligos; } if (params->allFiles) { inputString += ", allfiles=t"; } else { inputString += ", allfiles=f"; } if (params->flip) { inputString += ", flip=t"; } else { inputString += ", flip=f"; } if (params->keepforward) { inputString += ", keepforward=t"; } else { inputString += ", keepforward=f"; } inputString += ", pdiffs=" + toString(params->pdiffs) + ", bdiffs=" + toString(params->bdiffs) + ", ldiffs=" + toString(params->ldiffs) + ", sdiffs=" + toString(params->sdiffs); inputString += ", tdiffs=" + toString(params->tdiffs) + ", maxambig=" + toString(params->maxAmbig) + ", minlength=" + toString(params->minLength) + ", maxlength=" + toString(params->maxLength); if (params->keepFirst != 0) { inputString += ", keepfirst=" + toString(params->keepFirst); } if (params->removeLast != 0) { inputString += ", removelast=" + toString(params->removeLast); } inputString += ", processors=1"; //run trim.seqs params->m->mothurOut("\nRunning command: trim.seqs(" + inputString + ")\n"); Command* trimseqsCommand = new TrimSeqsCommand(inputString); trimseqsCommand->execute(); if (params->m->getControl_pressed()){ break; } temp = trimseqsCommand->getOutputFiles(); mergeOutputFileList(filenames, temp); delete trimseqsCommand; it = temp.find("fasta"); if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } } else { params->m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); params->m->setControl_pressed(true); break; } for (int i = 0; i < fastaFiles.size(); i++) { string end = fastaFiles[i].substr(fastaFiles[i].length()-10); if (end == "trim.fasta") { fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking } } it = temp.find("name"); if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } } else { params->m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); params->m->setControl_pressed(true); break; } for (int i = 0; i < nameFiles.size(); i++) { string end = nameFiles[i].substr(nameFiles[i].length()-10); if (end == "trim.names") { nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking } } vector groupFiles; string groupFile = ""; if (params->makeGroup) { it = temp.find("group"); if (it != temp.end()) { if ((it->second).size() != 0) { groupFiles = (it->second); } } //find group file with the shortest name. This is because if there is a composite group file it will be the shortest. groupFile = groupFiles[0]; for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } } } inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile; params->m->mothurOut("\nRunning command: summary.seqs(" + inputString + ")\n"); summarySeqsCommand = new SeqSummaryCommand(inputString); summarySeqsCommand->execute(); if (params->m->getControl_pressed()){ break; } temp = summarySeqsCommand->getOutputFiles(); mergeOutputFileList(filenames, temp); delete summarySeqsCommand; params->m->mothurOut("\n/******************************************/\n"); if (params->append) { params->util.appendFiles(fastaFile, params->fasta); params->util.appendFiles(nameFile, params->name); if (params->makeGroup) { params->util.appendFiles(groupFile, params->group); } } for (it = filenames.begin(); it != filenames.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { params->outputNames.push_back((it->second)[i]); params->outputTypes[it->first].push_back((it->second)[i]); } } params->count++; } } catch(exception& e) { params->m->errorOut(e, "SffMultipleCommand", "driver"); exit(1); } } //********************************************************************************************************************** long long SffMultipleCommand::createProcesses(vector sffFiles, vector oligosFiles, string fasta, string name, string group){ try { #if defined NON_WINDOWS #else //trim.flows, shhh.flows cannot handle multiple processors for windows. processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n"); #endif current->setMothurCalling(true); //divide the groups between the processors vector lines; vector numFilesToComplete; int numFilesPerProcessor = sffFiles.size() / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numFilesPerProcessor; int endIndex = (i+1) * numFilesPerProcessor; if(i == (processors - 1)){ endIndex = sffFiles.size(); } lines.push_back(linePair(startIndex, endIndex)); numFilesToComplete.push_back((endIndex-startIndex)); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1); sffMultipleData* dataBundle = new sffMultipleData(sffFiles, oligosFiles, fasta+extension, name+extension, group+extension, lines[i+1].start, lines[i+1].end); dataBundle->setVariables(trim, large, allFiles, flip, keepforward, makeGroup, append, largeSize, bdiffs, tdiffs, pdiffs, sdiffs, ldiffs, maxFlows, minFlows, minLength, maxLength, maxHomoP, maxIters, keepFirst, removeLast, signal, noise, cutoff, sigma, flowOrder, lookupFileName, minDelta); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverSFFMultiple, dataBundle)); } sffMultipleData* dataBundle = new sffMultipleData(sffFiles, oligosFiles, fasta, name, group, lines[0].start, lines[0].end); dataBundle->setVariables(trim, large, allFiles, flip, keepforward, makeGroup, append, largeSize, bdiffs, tdiffs, pdiffs, sdiffs, ldiffs, maxFlows, minFlows, minLength, maxLength, maxHomoP, maxIters, keepFirst, removeLast, signal, noise, cutoff, sigma, flowOrder, lookupFileName, minDelta); driverSFFMultiple(dataBundle); long long num = dataBundle->count; outputNames = dataBundle->outputNames; outputTypes = dataBundle->outputTypes; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; outputNames.insert(outputNames.end(), data[i]->outputNames.begin(), data[i]->outputNames.end()); outputTypes.insert(data[i]->outputTypes.begin(), data[i]->outputTypes.end()); if (append) { string extension = toString(i+1); util.appendFiles(fasta+extension, fasta); util.mothurRemove(fasta+extension); util.appendFiles(name+extension, name); util.mothurRemove(name+extension); if (makeGroup) { util.appendFiles(group+extension, group); util.mothurRemove(group+extension); } } delete data[i]; delete workerThreads[i]; } delete dataBundle; current->setMothurCalling(false); return num; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/sffmultiplecommand.h000077500000000000000000000031041424121717000222650ustar00rootroot00000000000000#ifndef Mothur_sffmultiplecommand_h #define Mothur_sffmultiplecommand_h // // sffmultiplecommand.h // Mothur // // Created by Sarah Westcott on 8/14/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "sffinfocommand.h" #include "seqsummarycommand.h" #include "trimflowscommand.h" #include "shhhercommand.h" #include "trimseqscommand.h" class SffMultipleCommand : public Command { public: SffMultipleCommand(string); ~SffMultipleCommand(){} vector setParameters(); string getCommandName() { return "sff.multiple"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Sff.multiple"; } string getDescription() { return "run multiple sff files through, sffinfo, trim.flow, shhh.flows and trim.seqs combining the results"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string inputDir; string filename, flowOrder, lookupFileName, minDelta; vector outputNames; bool abort, trim, large, flip, allFiles, keepforward, append, makeGroup; int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; int processors, maxIters, largeSize; float signal, noise, cutoff, sigma; int keepFirst, removeLast, maxAmbig; int readFile(vector& sffFiles, vector& oligosFiles); long long createProcesses(vector sffFiles, vector oligosFiles, string, string, string); }; #endif mothur-1.48.0/source/commands/shhhercommand.cpp000077500000000000000000002127671424121717000215700ustar00rootroot00000000000000/* * shhher.cpp * Mothur * * Created by Pat Schloss on 12/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "shhhercommand.h" //********************************************************************************************************************** vector ShhherCommand::setParameters(){ try { CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none","fasta-name-group-counts-qfile",false,false,true); parameters.push_back(pflow); CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none","fasta-name-group-counts-qfile",false,false,true); parameters.push_back(pfile); CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup); CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pmaxiter); CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge); CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma); CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta); CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["counts"] = tempOutNames; outputTypes["qfile"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ShhherCommand::getHelpString(){ try { string helpString = ""; helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n"; helpString += "The shhh.flows command parameters are flow, file, lookup, cutoff, processors, large, maxiter, sigma, mindelta and order.\n"; helpString += "The flow parameter is used to input your flow file.\n"; helpString += "The file parameter is used to input the *flow.files file created by trim.flows.\n"; helpString += "The lookup parameter is used specify the lookup file you would like to use. http://www.mothur.org/wiki/Lookup_files.\n"; helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ShhherCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],shhh.fasta"; } else if (type == "name") { pattern = "[filename],shhh.names"; } else if (type == "group") { pattern = "[filename],shhh.groups"; } else if (type == "counts") { pattern = "[filename],shhh.counts"; } else if (type == "qfile") { pattern = "[filename],shhh.qual"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ShhherCommand::ShhherCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters flowFileName = validParameter.validFile(parameters, "flow"); flowFilesFileName = validParameter.validFile(parameters, "file"); if (flowFileName == "not found" && flowFilesFileName == "not found") { m->mothurOut("values for either flow or file must be provided for the shhh.flows command.\n"); abort = true; } else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; } if(flowFileName != "not found"){ compositeFASTAFileName = ""; compositeNamesFileName = ""; } else{ ofstream temp; string thisoutputDir = outputdir; if (outputdir == "") { thisoutputDir = util.hasPath(flowFilesFileName); } //we want to rip off .files, and also .flow if its there string fileroot = util.getRootName(util.getSimpleName(flowFilesFileName)); if (fileroot[fileroot.length()-1] == '.') { fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot string extension = util.getExtension(fileroot); if (extension == ".flow") { fileroot = util.getRootName(fileroot); } else { fileroot += "."; } //add back if needed compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta"; util.openOutputFile(compositeFASTAFileName, temp); temp.close(); compositeNamesFileName = thisoutputDir + fileroot + "shhh.names"; util.openOutputFile(compositeNamesFileName, temp); temp.close(); } if(flowFilesFileName != "not found"){ ifstream flowFilesFile; util.openInputFile(flowFilesFileName, flowFilesFile); while(flowFilesFile){ string fName = util.getline(flowFilesFile); gobble(flowFilesFile); //check to make sure both are able to be opened bool ableToOpen = util.checkLocations(fName, current->getLocations()); if (ableToOpen) { if (util.isBlank(fName)) { m->mothurOut("[WARNING]: " + fName + " is blank, disregarding.\n"); } else { flowFileVector.push_back(fName); } }else { m->mothurOut("Unable to open " + fName + ". Disregarding.\n"); } } flowFilesFile.close(); if (flowFileVector.size() == 0) { m->mothurOut("[ERROR]: no valid files.\n"); abort = true; } } else{ if (outputdir == "") { outputdir = util.hasPath(flowFileName); } flowFileVector.push_back(flowFileName); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp; temp = validParameter.validFile(parameters, "lookup"); if (temp == "not found") { string path = current->getProgramPath(); #if defined NON_WINDOWS path += "lookupFiles/"; #else path += "lookupFiles\\"; #endif lookupFileName = util.getFullPathName(path) + "LookUp_Titanium.pat"; //check to make sure both are able to be opened bool ableToOpen = util.checkLocations(lookupFileName, current->getLocations()); if (ableToOpen) { if (util.isBlank(lookupFileName)) { m->mothurOut("[ERROR]: " + lookupFileName + " is blank, aborting.\n"); abort=true; } }else { m->mothurOut("[ERROR]: Unable to open " + lookupFileName + ".\n"); abort=true; } } else if(temp == "not open") { lookupFileName = validParameter.validPath(parameters, "lookup"); //check to make sure both are able to be opened bool ableToOpen = util.checkLocations(lookupFileName, current->getLocations()); if (ableToOpen) { if (util.isBlank(lookupFileName)) { m->mothurOut("[ERROR]: " + lookupFileName + " is blank, aborting.\n"); abort=true; } }else { m->mothurOut("[ERROR]: Unable to open " + lookupFileName + ".\n"); abort=true; } }else { lookupFileName = temp; } temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found"){ temp = "0.01"; } util.mothurConvert(temp, cutoff); temp = validParameter.valid(parameters, "mindelta"); if (temp == "not found"){ temp = "0.000001"; } util.mothurConvert(temp, minDelta); temp = validParameter.valid(parameters, "maxiter"); if (temp == "not found"){ temp = "1000"; } util.mothurConvert(temp, maxIters); temp = validParameter.valid(parameters, "large"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, largeSize); if (largeSize != 0) { large = true; } else { large = false; } if (largeSize < 0) { m->mothurOut("The value of the large cannot be negative.\n"); } temp = validParameter.valid(parameters, "sigma"); if (temp == "not found") { temp = "60"; } util.mothurConvert(temp, sigma); temp = validParameter.valid(parameters, "order"); if (temp == "not found"){ temp = "A"; } if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } else { if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; } else if(toupper(temp[0]) == 'B'){ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; } else if(toupper(temp[0]) == 'I'){ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; } else { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } } } } catch(exception& e) { m->errorOut(e, "ShhherCommand", "ShhherCommand"); exit(1); } } //********************************************************************************************************************** int ShhherCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } int startTime = time(nullptr); getSingleLookUp(); if (m->getControl_pressed()) { return 0; } getJointLookUp(); if (m->getControl_pressed()) { return 0; } driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName); if(compositeFASTAFileName != ""){ outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName); outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } } m->mothurOut("It took " + toString(time(nullptr) - startTime) + " secs to de-noise your sequences.\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "execute"); exit(1); } } //******************************************************************************************************************** //sorts biggest to smallest inline bool compareFileSizes(string left, string right){ FILE * pFile; long leftsize = 0; //get num bytes in file string filename = left; pFile = fopen (filename.c_str(),"rb"); string error = "Error opening " + filename; if (pFile==nullptr) perror (error.c_str()); else{ fseek (pFile, 0, SEEK_END); leftsize=ftell (pFile); fclose (pFile); } FILE * pFile2; long rightsize = 0; //get num bytes in file filename = right; pFile2 = fopen (filename.c_str(),"rb"); error = "Error opening " + filename; if (pFile2==nullptr) perror (error.c_str()); else{ fseek (pFile2, 0, SEEK_END); rightsize=ftell (pFile2); fclose (pFile2); } return (leftsize > rightsize); } /**************************************************************************************************/ vector ShhherCommand::parseFlowFiles(string filename){ try { vector files; int count = 0; ifstream in; util.openInputFile(filename, in); int thisNumFLows = 0; in >> thisNumFLows; gobble(in); while (!in.eof()) { if (m->getControl_pressed()) { break; } ofstream out; string outputFileName = filename + toString(count) + ".temp"; util.openOutputFile(outputFileName, out); out << thisNumFLows << endl; files.push_back(outputFileName); int numLinesWrote = 0; for (int i = 0; i < largeSize; i++) { if (in.eof()) { break; } string line = util.getline(in); gobble(in); out << line << endl; numLinesWrote++; } out.close(); if (numLinesWrote == 0) { util.mothurRemove(outputFileName); files.pop_back(); } count++; } in.close(); if (m->getControl_pressed()) { for (int i = 0; i < files.size(); i++) { util.mothurRemove(files[i]); } files.clear(); } m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); return files; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "parseFlowFiles"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::driver(vector filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName){ try { int numCompleted = 0; for(int i=0;igetControl_pressed()) { break; } vector theseFlowFileNames; theseFlowFileNames.push_back(filenames[i]); if (large) { theseFlowFileNames = parseFlowFiles(filenames[i]); } if (m->getControl_pressed()) { break; } double begClock = clock(); unsigned long long begTime; string fileNameForOutput = filenames[i]; for (int g = 0; g < theseFlowFileNames.size(); g++) { string flowFileName = theseFlowFileNames[g]; m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(i+1) + " of " + toString(filenames.size()) + ")\t<<<<<\n"); m->mothurOut("Reading flowgrams...\n"); vector seqNameVector; vector lengths; vector flowDataIntI; vector flowDataPrI; map nameMap; vector uniqueFlowgrams; vector uniqueCount; vector mapSeqToUnique; vector mapUniqueToSeq; vector uniqueLengths; int numFlowCells; if (m->getDebug()) { m->mothurOut("[DEBUG]: About to read flowgrams.\n"); } int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells); if (m->getControl_pressed()) { break; } m->mothurOut("Identifying unique flowgrams...\n"); int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI); if (m->getControl_pressed()) { break; } m->mothurOut("Calculating distances between flowgrams...\n"); string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist"; begTime = time(nullptr); flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI); m->mothurOutEndLine(); m->mothurOut("Total time: " + toString(time(nullptr) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n'); string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names"; createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq); if (m->getControl_pressed()) { break; } m->mothurOut("\nClustering flowgrams...\n"); string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list"; cluster(listFileName, distFileName, namesFileName); if (m->getControl_pressed()) { break; } vector otuData; vector cumNumSeqs; vector nSeqsPerOTU; vector > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices vector > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI vector seqNumber; //tMaster->anP: the sequence id number sorted by OTU vector seqIndex; //tMaster->anI; the index that corresponds to seqNumber int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap); if (m->getControl_pressed()) { break; } util.mothurRemove(distFileName); util.mothurRemove(namesFileName); util.mothurRemove(listFileName); vector dist; //adDist - distance of sequences to centroids vector change; //did the centroid sequence change? 0 = no; 1 = yes vector centroids; //the representative flowgram for each cluster m vector weight; vector singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs) vector nSeqsBreaks; vector nOTUsBreaks; if (m->getDebug()) { m->mothurOut("[DEBUG]: numSeqs = " + toString(numSeqs) + " numOTUS = " + toString(numOTUs) + " about to alloc a dist vector with size = " + toString((numSeqs * numOTUs)) + ".\n"); } dist.assign(numSeqs * numOTUs, 0); change.assign(numOTUs, 1); centroids.assign(numOTUs, -1); weight.assign(numOTUs, 0); singleTau.assign(numSeqs, 1.0); nSeqsBreaks.assign(2, 0); nOTUsBreaks.assign(2, 0); nSeqsBreaks[0] = 0; nSeqsBreaks[1] = numSeqs; nOTUsBreaks[1] = numOTUs; if (m->getDebug()) { m->mothurOut("[DEBUG]: done allocating memory, about to denoise.\n"); } if (m->getControl_pressed()) { break; } double maxDelta = 0; int iter = 0; begClock = clock(); begTime = time(nullptr); m->mothurOut("\nDenoising flowgrams...\n"); m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n"); while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ if (m->getControl_pressed()) { break; } double cycClock = clock(); unsigned long long cycTime = time(nullptr); fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI); if (m->getControl_pressed()) { break; } calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber); if (m->getControl_pressed()) { break; } maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight); if (m->getControl_pressed()) { break; } double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); if (m->getControl_pressed()) { break; } checkCentroids(numOTUs, centroids, weight); if (m->getControl_pressed()) { break; } calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU, dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths); if (m->getControl_pressed()) { break; } iter++; m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(nullptr) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n'); } if (m->getControl_pressed()) { break; } m->mothurOut("\nFinalizing...\n"); fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI); if (m->getDebug()) { m->mothurOut("[DEBUG]: done fill().\n"); } if (m->getControl_pressed()) { break; } setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI); if (m->getDebug()) { m->mothurOut("[DEBUG]: done setOTUs().\n"); } if (m->getControl_pressed()) { break; } vector otuCounts(numOTUs, 0); for(int j=0;jgetDebug()) { m->mothurOut("[DEBUG]: done calcCentroidsDriver().\n"); } if (m->getControl_pressed()) { break; } if ((large) && (g == 0)) { flowFileName = filenames[i]; theseFlowFileNames[0] = filenames[i]; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(flowFileName); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(flowFileName)); string qualityFileName = getOutputFileName("qfile",variables); string fastaFileName = getOutputFileName("fasta",variables); string nameFileName = getOutputFileName("name",variables); string otuCountsFileName = getOutputFileName("counts",variables); string fileRoot = util.getRootName(util.getSimpleName(flowFileName)); int pos = fileRoot.find_first_of('.'); string fileGroup = fileRoot; if (pos != string::npos) { fileGroup = fileRoot.substr(pos+1, (fileRoot.length()-1-(pos+1))); } string groupFileName = getOutputFileName("group",variables); writeQualities(numOTUs, numFlowCells, qualityFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); if (m->getControl_pressed()) { break; } writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, fastaFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);if (m->getControl_pressed()) { break; } writeNames(thisCompositeNamesFileName, numOTUs, nameFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU); if (m->getControl_pressed()) { break; } writeClusters(otuCountsFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI); if (m->getControl_pressed()) { break; } writeGroups(groupFileName, fileGroup, numSeqs, seqNameVector); if (m->getControl_pressed()) { break; } if (large) { if (g > 0) { variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(theseFlowFileNames[0])); util.appendFiles(qualityFileName, getOutputFileName("qfile",variables)); util.mothurRemove(qualityFileName); util.appendFiles(fastaFileName, getOutputFileName("fasta",variables)); util.mothurRemove(fastaFileName); util.appendFiles(nameFileName, getOutputFileName("name",variables)); util.mothurRemove(nameFileName); util.appendFiles(otuCountsFileName, getOutputFileName("counts",variables)); util.mothurRemove(otuCountsFileName); util.appendFiles(groupFileName, getOutputFileName("group",variables)); util.mothurRemove(groupFileName); } util.mothurRemove(theseFlowFileNames[g]); } } numCompleted++; m->mothurOut("Total time to process " + fileNameForOutput + ":\t" + toString(time(nullptr) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n'); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } return numCompleted; }catch(exception& e) { m->errorOut(e, "ShhherCommand", "driver"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::getFlowData(string filename, vector& thisSeqNameVector, vector& thisLengths, vector& thisFlowDataIntI, map& thisNameMap, int& numFlowCells){ try{ ifstream flowFile; util.openInputFile(filename, flowFile); string seqName; int currentNumFlowCells; float intensity; thisSeqNameVector.clear(); thisLengths.clear(); thisFlowDataIntI.clear(); thisNameMap.clear(); string numFlowTest; flowFile >> numFlowTest; if (!util.isContainingOnlyDigits(numFlowTest)) { m->mothurOut("[ERROR]: expected a number and got " + numFlowTest + ", quitting. Did you use the flow parameter instead of the file parameter?\n"); exit(1); } else { convert(numFlowTest, numFlowCells); } if (m->getDebug()) { m->mothurOut("[DEBUG]: numFlowCells = " + toString(numFlowCells) + ".\n"); } int index = 0;//pcluster while(!flowFile.eof()){ if (m->getControl_pressed()) { break; } flowFile >> seqName >> currentNumFlowCells; thisLengths.push_back(currentNumFlowCells); thisSeqNameVector.push_back(seqName); thisNameMap[seqName] = index++;//pcluster if (m->getDebug()) { m->mothurOut("[DEBUG]: seqName = " + seqName + " length = " + toString(currentNumFlowCells) + " index = " + toString(index) + "\n"); } for(int i=0;i> intensity; if(intensity > 9.99) { intensity = 9.99; } int intI = int(100 * intensity + 0.0001); thisFlowDataIntI.push_back(intI); } gobble(flowFile); } flowFile.close(); int numSeqs = thisSeqNameVector.size(); for(int i=0;igetControl_pressed()) { break; } int iNumFlowCells = i * numFlowCells; for(int j=thisLengths[i];jerrorOut(e, "ShhherCommand", "getFlowData"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::flowDistParentFork(int numFlowCells, string distFileName, int stopSeq, vector& mapUniqueToSeq, vector& mapSeqToUnique, vector& lengths, vector& flowDataPrI, vector& flowDataIntI){ try{ ostringstream outStream; outStream.setf(ios::fixed, ios::floatfield); outStream.setf(ios::dec, ios::basefield); outStream.setf(ios::showpoint); outStream.precision(6); int begTime = time(nullptr); double begClock = clock(); for(int i=0;igetControl_pressed()) { break; } for(int j=0;jmothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - begTime)); m->mothurOutJustToScreen("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC)+"\n"); } } ofstream distFile(distFileName.c_str()); distFile << outStream.str(); distFile.close(); if (m->getControl_pressed()) {} else { m->mothurOutJustToScreen(toString(stopSeq-1) + "\t" + toString(time(nullptr) - begTime)); m->mothurOutJustToScreen("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC)+"\n"); } return 0; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "flowDistParentFork"); exit(1); } } /**************************************************************************************************/ float ShhherCommand::calcPairwiseDist(int numFlowCells, int seqA, int seqB, vector& mapSeqToUnique, vector& lengths, vector& flowDataPrI, vector& flowDataIntI){ try{ int minLength = lengths[mapSeqToUnique[seqA]]; if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]]; } int ANumFlowCells = seqA * numFlowCells; int BNumFlowCells = seqB * numFlowCells; float dist = 0; for(int i=0;igetControl_pressed()) { break; } int flowAIntI = flowDataIntI[ANumFlowCells + i]; float flowAPrI = flowDataPrI[ANumFlowCells + i]; int flowBIntI = flowDataIntI[BNumFlowCells + i]; float flowBPrI = flowDataPrI[BNumFlowCells + i]; dist += jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI; } dist /= (float) minLength; return dist; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "calcPairwiseDist"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::getUniques(int numSeqs, int numFlowCells, vector& uniqueFlowgrams, vector& uniqueCount, vector& uniqueLengths, vector& mapSeqToUnique, vector& mapUniqueToSeq, vector& lengths, vector& flowDataPrI, vector& flowDataIntI){ try{ int numUniques = 0; uniqueFlowgrams.assign(numFlowCells * numSeqs, -1); uniqueCount.assign(numSeqs, 0); // anWeights uniqueLengths.assign(numSeqs, 0); mapSeqToUnique.assign(numSeqs, -1); mapUniqueToSeq.assign(numSeqs, -1); vector uniqueFlowDataIntI(numFlowCells * numSeqs, -1); for(int i=0;igetControl_pressed()) { break; } int index = 0; vector current(numFlowCells); for(int j=0;j uniqueLengths[j]) { uniqueLengths[j] = lengths[i]; } break; } index++; } if(index == numUniques){ uniqueLengths[numUniques] = lengths[i]; uniqueCount[numUniques] = 1; mapSeqToUnique[i] = numUniques;//anMap mapUniqueToSeq[numUniques] = i;//anF for(int k=0;kgetControl_pressed()) { break; } flowDataPrI[i] = getProbIntensity(flowDataIntI[i]); } return numUniques; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getUniques"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::createNamesFile(int numSeqs, int numUniques, string filename, vector& seqNameVector, vector& mapSeqToUnique, vector& mapUniqueToSeq){ try{ vector duplicateNames(numUniques, ""); for(int i=0;igetControl_pressed()) { break; } // nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl; nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl; } nameFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "createNamesFile"); exit(1); } } //********************************************************************************************************************** int ShhherCommand::cluster(string filename, string distFileName, string namesFileName){ try { ReadMatrix* read = new ReadColumnMatrix(distFileName); read->setCutoff(cutoff); NameAssignment* clusterNameMap = new NameAssignment(namesFileName); clusterNameMap->readMap(); read->read(clusterNameMap); ListVector* list = read->getListVector(); SparseDistanceMatrix* matrix = read->getDMatrix(); delete read; delete clusterNameMap; RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); float adjust = -1.0; Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest", adjust); string tag = cluster->getTag(); double clusterCutoff = cutoff; while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){ if (m->getControl_pressed()) { break; } cluster->update(clusterCutoff); } list->setLabel(toString(cutoff)); ofstream listFile; util.openOutputFile(filename, listFile); list->print(listFile, true); listFile.close(); delete matrix; delete cluster; delete rabund; delete list; return 0; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "cluster"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::getOTUData(int numSeqs, string fileName, vector& otuData, vector& cumNumSeqs, vector& nSeqsPerOTU, vector >& aaP, //tMaster->aanP: each row is a different otu / each col contains the sequence indices vector >& aaI, //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI vector& seqNumber, //tMaster->anP: the sequence id number sorted by OTU vector& seqIndex, map& nameMap){ try { InputData input(fileName, "list", nullVector); ListVector* list = input.getListVector(); string label = list->getLabel(); int numOTUs = list->getNumBins(); if (m->getDebug()) { m->mothurOut("[DEBUG]: Getting OTU Data...\n"); } otuData.assign(numSeqs, 0); cumNumSeqs.assign(numOTUs, 0); nSeqsPerOTU.assign(numOTUs, 0); aaP.clear();aaP.resize(numOTUs); seqNumber.clear(); aaI.clear(); seqIndex.clear(); for(int i=0;igetControl_pressed()) { break; } if (m->getDebug()) { m->mothurOut("[DEBUG]: processing OTU " + toString(i) + ".\n"); } string singleOTU = list->get(i); vector otuSeqs; util.splitAtComma(singleOTU, otuSeqs); for(int j=0;j::iterator nmIt = nameMap.find(seqName); int index = nmIt->second; nameMap.erase(nmIt); otuData[index] = i; nSeqsPerOTU[i]++; aaP[i].push_back(index); } sort(aaP[i].begin(), aaP[i].end()); for(int j=0;jerrorOut(e, "ShhherCommand", "getOTUData"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::calcCentroidsDriver(int numOTUs, vector& cumNumSeqs, vector& nSeqsPerOTU, vector& seqIndex, vector& change, //did the centroid sequence change? 0 = no; 1 = yes vector& centroids, //the representative flowgram for each cluster m vector& singleTau, //tMaster->adTau: 1-D Tau vector (1xnumSeqs) vector& mapSeqToUnique, vector& uniqueFlowgrams, vector& flowDataIntI, vector& lengths, int numFlowCells, vector& seqNumber){ //this function gets the most likely homopolymer length at a flow position for a group of sequences //within an otu try{ for(int i=0;igetControl_pressed()) { break; } double count = 0; int position = 0; int minFlowGram = 100000000; double minFlowValue = 1e8; change[i] = 0; //FALSE for(int j=0;j 0 && count > MIN_COUNT){ vector adF(nSeqsPerOTU[i]); vector anL(nSeqsPerOTU[i]); for(int j=0;jerrorOut(e, "ShhherCommand", "calcCentroidsDriver"); exit(1); } } /**************************************************************************************************/ double ShhherCommand::getDistToCentroid(int cent, int flow, int length, vector& uniqueFlowgrams, vector& flowDataIntI, int numFlowCells){ try{ int flowAValue = cent * numFlowCells; int flowBValue = flow * numFlowCells; double dist = 0; for(int i=0;ierrorOut(e, "ShhherCommand", "getDistToCentroid"); exit(1); } } /**************************************************************************************************/ double ShhherCommand::getNewWeights(int numOTUs, vector& cumNumSeqs, vector& nSeqsPerOTU, vector& singleTau, vector& seqNumber, vector& weight){ try{ double maxChange = 0; for(int i=0;igetControl_pressed()) { break; } double difference = weight[i]; weight[i] = 0; for(int j=0;j maxChange){ maxChange = difference; } } return maxChange; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getNewWeights"); exit(1); } } /**************************************************************************************************/ double ShhherCommand::getLikelihood(int numSeqs, int numOTUs, vector& nSeqsPerOTU, vector& seqNumber, vector& cumNumSeqs, vector& seqIndex, vector& dist, vector& weight){ try{ vector P(numSeqs, 0); int effNumOTUs = 0; for(int i=0;i MIN_WEIGHT){ effNumOTUs++; } } string hold; for(int i=0;igetControl_pressed()) { break; } for(int j=0;jerrorOut(e, "ShhherCommand", "getNewWeights"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::checkCentroids(int numOTUs, vector& centroids, vector& weight){ try{ vector unique(numOTUs, 1); for(int i=0;igetControl_pressed()) { break; } if(unique[i] == 1){ for(int j=i+1;jerrorOut(e, "ShhherCommand", "checkCentroids"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::calcNewDistances(int numSeqs, int numOTUs, vector& nSeqsPerOTU, vector& dist, vector& weight, vector& change, vector& centroids, vector >& aaP, vector& singleTau, vector >& aaI, vector& seqNumber, vector& seqIndex, vector& uniqueFlowgrams, vector& flowDataIntI, int numFlowCells, vector& lengths){ try{ int total = 0; vector newTau(numOTUs,0); vector norms(numSeqs, 0); nSeqsPerOTU.assign(numOTUs, 0); for(int i=0;igetControl_pressed()) { break; } int indexOffset = i * numOTUs; double offset = 1e8; for(int j=0;j MIN_WEIGHT && change[j] == 1){ dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells); } if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){ offset = dist[indexOffset + j]; } } for(int j=0;j MIN_WEIGHT){ newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j]; norms[i] += newTau[j]; } else{ newTau[j] = 0.0; } } for(int j=0;j MIN_TAU){ int oldTotal = total; total++; singleTau.resize(total, 0); seqNumber.resize(total, 0); seqIndex.resize(total, 0); singleTau[oldTotal] = newTau[j]; aaP[j][nSeqsPerOTU[j]] = oldTotal; aaI[j][nSeqsPerOTU[j]] = i; nSeqsPerOTU[j]++; } } } } catch(exception& e) { m->errorOut(e, "ShhherCommand", "calcNewDistances"); exit(1); } } /**************************************************************************************************/ int ShhherCommand::fill(int numOTUs, vector& seqNumber, vector& seqIndex, vector& cumNumSeqs, vector& nSeqsPerOTU, vector >& aaP, vector >& aaI){ try { int index = 0; for(int i=0;igetControl_pressed()) { return 0; } cumNumSeqs[i] = index; for(int j=0;jerrorOut(e, "ShhherCommand", "fill"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::setOTUs(int numOTUs, int numSeqs, vector& seqNumber, vector& seqIndex, vector& cumNumSeqs, vector& nSeqsPerOTU, vector& otuData, vector& singleTau, vector& dist, vector >& aaP, vector >& aaI){ try { vector bigTauMatrix(numOTUs * numSeqs, 0.0000); for(int i=0;igetControl_pressed()) { break; } for(int j=0;j maxTau){ maxTau = bigTauMatrix[i * numOTUs + j]; maxOTU = j; } } otuData[i] = maxOTU; } nSeqsPerOTU.assign(numOTUs, 0); for(int i=0;ierrorOut(e, "ShhherCommand", "setOTUs"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string qualityFileName, vector otuCounts, vector& nSeqsPerOTU, vector& seqNumber, vector& singleTau, vector& flowDataIntI, vector& uniqueFlowgrams, vector& cumNumSeqs, vector& mapUniqueToSeq, vector& seqNameVector, vector& centroids, vector >& aaI){ try { ofstream qualityFile; util.openOutputFile(qualityFileName, qualityFile); qualityFile.setf(ios::fixed, ios::floatfield); qualityFile.setf(ios::showpoint); qualityFile << setprecision(6); vector > qualities(numOTUs); vector pr(HOMOPS, 0); for(int i=0;igetControl_pressed()) { break; } int index = 0; if(nSeqsPerOTU[i] > 0){ while(index < numFlowCells){ double maxPrValue = 1e8; short maxPrIndex = -1; double count = 0.0000; pr.assign(HOMOPS, 0); for(int j=0;j MIN_COUNT){ double U = 0.0000; double norm = 0.0000; for(int s=0;s0.00){ temp = log10(U); } else{ temp = -10.1; } temp = floor(-10 * temp); value = (int)floor(temp); if(value > 100){ value = 100; } qualities[i].push_back((int)value); } }//end if index++; }//end while }//end if if(otuCounts[i] > 0){ qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl; //need to get past the first four bases for (int j = 4; j < qualities[i].size(); j++) { qualityFile << qualities[i][j] << ' '; } qualityFile << endl; } }//end for qualityFile.close(); outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName); } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeQualities"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string fastaFileName, vector otuCounts, vector& uniqueFlowgrams, vector& seqNameVector, vector >& aaI, vector& centroids){ try { ofstream fastaFile; util.openOutputFile(fastaFileName, fastaFile); vector names(numOTUs, ""); for(int i=0;igetControl_pressed()) { break; } int index = centroids[i]; if(otuCounts[i] > 0){ fastaFile << '>' << seqNameVector[aaI[i][0]] << endl; string newSeq = ""; for(int j=0;j= 4) { fastaFile << newSeq.substr(4) << endl; } else { fastaFile << "NNNN" << endl; } } } fastaFile.close(); outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); if(thisCompositeFASTAFileName != ""){ util.appendFiles(fastaFileName, thisCompositeFASTAFileName); } } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeSequences"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string nameFileName, vector otuCounts, vector& seqNameVector, vector >& aaI, vector& nSeqsPerOTU){ try { ofstream nameFile; util.openOutputFile(nameFileName, nameFile); for(int i=0;igetControl_pressed()) { break; } if(otuCounts[i] > 0){ nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]]; for(int j=1;jerrorOut(e, "ShhherCommand", "writeNames"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSeqs, vector& seqNameVector){ try { ofstream groupFile; util.openOutputFile(groupFileName, groupFile); for(int i=0;igetControl_pressed()) { break; } groupFile << seqNameVector[i] << '\t' << fileRoot << endl; } groupFile.close(); outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName); } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeGroups"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int numFlowCells, vector otuCounts, vector& centroids, vector& uniqueFlowgrams, vector& seqNameVector, vector >& aaI, vector& nSeqsPerOTU, vector& lengths, vector& flowDataIntI){ try { ofstream otuCountsFile; util.openOutputFile(otuCountsFileName, otuCountsFile); string bases = flowOrder; for(int i=0;igetControl_pressed()) { break; } //output the translated version of the centroid sequence for the otu if(otuCounts[i] > 0){ int index = centroids[i]; otuCountsFile << "ideal\t"; for(int j=8;j= 4) { otuCountsFile << newSeq.substr(4) << endl; } else { otuCountsFile << "NNNN" << endl; } } otuCountsFile << endl; } } otuCountsFile.close(); outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName); } catch(exception& e) { m->errorOut(e, "ShhherCommand", "writeClusters"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::getSingleLookUp(){ try{ // these are the -log probabilities that a signal corresponds to a particular homopolymer length singleLookUp.assign(HOMOPS * NUMBINS, 0); int index = 0; ifstream lookUpFile; util.openInputFile(lookupFileName, lookUpFile); for(int i=0;igetControl_pressed()) { break; } float logFracFreq; lookUpFile >> logFracFreq; for(int j=0;j> singleLookUp[index]; index++; } } lookUpFile.close(); } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getSingleLookUp"); exit(1); } } /**************************************************************************************************/ void ShhherCommand::getJointLookUp(){ try{ // the most likely joint probability (-log) that two intenities have the same polymer length jointLookUp.resize(NUMBINS * NUMBINS, 0); for(int i=0;igetControl_pressed()) { break; } for(int j=0;jerrorOut(e, "ShhherCommand", "getJointLookUp"); exit(1); } } /**************************************************************************************************/ double ShhherCommand::getProbIntensity(int intIntensity){ try{ double minNegLogProb = 100000000; for(int i=0;igetControl_pressed()) { break; } float negLogProb = singleLookUp[i * NUMBINS + intIntensity]; if(negLogProb < minNegLogProb) { minNegLogProb = negLogProb; } } return minNegLogProb; } catch(exception& e) { m->errorOut(e, "ShhherCommand", "getProbIntensity"); exit(1); } } mothur-1.48.0/source/commands/shhhercommand.h000077500000000000000000000117121424121717000212200ustar00rootroot00000000000000#ifndef SHHHER_H #define SHHHER_H /* * shhher.h * Mothur * * Created by Pat Schloss on 12/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "readcolumn.h" #include "readmatrix.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "listvector.hpp" #include "cluster.hpp" #include "inputdata.h" #include //********************************************************************************************************************** #define NUMBINS 1000 #define HOMOPS 10 #define MIN_COUNT 0.1 #define MIN_WEIGHT 0.1 #define MIN_TAU 0.0001 #define MIN_ITER 10 //********************************************************************************************************************** class ShhherCommand : public Command { public: ShhherCommand(string); ~ShhherCommand() = default; vector setParameters(); string getCommandName() { return "shhh.flows"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nQuince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011). Removing noise from pyrosequenced amplicons. BMC Bioinformatics 12:38.\nQuince C, Lanzén A, Curtis TP, Davenport RJ, Hall N, Head IM, Read LF, Sloan WT (2009). Accurate determination of microbial diversity from 454 pyrosequencing data. Nat. Methods 6:639.\nhttp://www.mothur.org/wiki/Shhh.flows"; } string getDescription() { return "shhh.flows"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, large; string flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName; int maxIters, largeSize; float cutoff, sigma, minDelta; string flowOrder; vector outputNames; vector singleLookUp; vector jointLookUp; vector flowFileVector; vector parseFlowFiles(string); int driver(vector, string, string); int getFlowData(string, vector&, vector&, vector&, map&, int&); int getUniques(int, int, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&); int flowDistParentFork(int, string, int, vector&, vector&, vector&, vector&, vector&); float calcPairwiseDist(int, int, int, vector&, vector&, vector&, vector&); int createNamesFile(int, int, string, vector&, vector&, vector&); int cluster(string, string, string); int getOTUData(int numSeqs, string, vector&, vector&, vector&, vector >&, vector >&, vector&, vector&,map&); int calcCentroidsDriver(int numOTUs, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&, int, vector&); double getDistToCentroid(int, int, int, vector&, vector&, int); double getNewWeights(int, vector&, vector&, vector&, vector&, vector&); double getLikelihood(int, int, vector&, vector&, vector&, vector&, vector&, vector&); int checkCentroids(int, vector&, vector&); void calcNewDistances(int, int, vector& , vector&,vector& , vector& change, vector&,vector >&, vector&, vector >&, vector&, vector&, vector&, vector&, int, vector&); int fill(int, vector&, vector&, vector&, vector&, vector >&, vector >&); void setOTUs(int, int, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector >&, vector >&); void writeQualities(int, int, string, vector, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector&, vector >&); void writeSequences(string, int, int, string, vector, vector&, vector&, vector >&, vector&); void writeNames(string, int, string, vector, vector&, vector >&, vector&); void writeGroups(string, string, int, vector&); void writeClusters(string, int, int, vector, vector&, vector&, vector&, vector >&, vector&, vector&, vector&); void getSingleLookUp(); void getJointLookUp(); double getProbIntensity(int); }; //********************************************************************************************************************** #endif mothur-1.48.0/source/commands/shhhseqscommand.cpp000077500000000000000000000656611424121717000221340ustar00rootroot00000000000000/* * shhhseqscommand.cpp * Mothur * * Created by westcott on 11/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "shhhseqscommand.h" //********************************************************************************************************************** vector ShhhSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-map",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","name",false,true,true); parameters.push_back(pname); CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(psigma); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["map"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string ShhhSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The shhh.seqs command reads a fasta and name file and ....\n"; helpString += "The shhh.seqs command parameters are fasta, name, group, sigma and processors.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n"; helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The sigma parameter .... The default is 0.01. \n"; helpString += "The shhh.seqs command should be in the following format: \n"; helpString += "shhh.seqs(fasta=yourFastaFile, name=yourNameFile) \n"; helpString += "Example: shhh.seqs(fasta=AD.align, name=AD.names) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string ShhhSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],shhh_seqs.fasta"; } else if (type == "name") { pattern = "[filename],shhh_seqs.names"; } else if (type == "map") { pattern = "[filename],shhh_seqs.map"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** ShhhSeqsCommand::ShhhSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } } else if (fastafile == "not open") { abort = true; } else { current->setFastaFile(fastafile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... namefile = validParameter.validFile(parameters, "name"); if (namefile == "not found") { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { m->mothurOut("You have no current namefile and the name parameter is required.\n"); abort = true; } } else if (namefile == "not open") { namefile = ""; abort = true; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not found") { groupfile = ""; } else if (groupfile == "not open") { abort = true; groupfile = ""; } else { current->setGroupFile(groupfile); } string temp = validParameter.valid(parameters, "sigma"); if(temp == "not found"){ temp = "0.01"; } util.mothurConvert(temp, sigma); sigma = 1/sigma; temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); } } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "ShhhSeqsCommand"); exit(1); } } //********************************************************************************************************************** int driver(seqNoise& noise, vector& sequences, vector& uniqueNames, vector& redundantNames, vector& seqFreq, string distFileName, string outputFileName, string nameFileName, string mapFileName, MothurOut* m, int sigma) { try { Utils util; double cutOff = 0.08; int minIter = 10; int maxIter = 1000; double minDelta = 1e-6; int numIters = 0; double maxDelta = MOTHURMAX; int numSeqs = sequences.size(); //run cluster command string inputString = "phylip=" + distFileName + ", method=furthest, cutoff=0.08"; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: cluster(" + inputString + ")\n"); Command* clusterCommand = new ClusterCommand(inputString); clusterCommand->execute(); map > filenames = clusterCommand->getOutputFiles(); string listFileName = filenames["list"][0]; string rabundFileName = filenames["rabund"][0]; util.mothurRemove(rabundFileName); string sabundFileName = filenames["sabund"][0]; util.mothurRemove(sabundFileName); delete clusterCommand; m->mothurOut("/******************************************/\n"); if (m->getControl_pressed()) { util.mothurRemove(distFileName); util.mothurRemove(listFileName); return 0; } vector distances(numSeqs * numSeqs); noise.getDistanceData(distFileName, distances); util.mothurRemove(distFileName); if (m->getControl_pressed()) { util.mothurRemove(listFileName); return 0; } vector otuData(numSeqs); vector otuFreq; vector > otuBySeqLookUp; noise.getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp); util.mothurRemove(listFileName); if (m->getControl_pressed()) { return 0; } int numOTUs = otuFreq.size(); vector weights(numOTUs, 0); vector change(numOTUs, 1); vector centroids(numOTUs, -1); vector cumCount(numOTUs, 0); vector tau(numSeqs, 1); vector anP(numSeqs, 0); vector anI(numSeqs, 0); vector anN(numSeqs, 0); vector > aanI = otuBySeqLookUp; while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){ if (m->getControl_pressed()) { return 0; } noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (m->getControl_pressed()) { return 0; } maxDelta = noise.calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau); if (m->getControl_pressed()) { return 0; } noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->getControl_pressed()) { return 0; } noise.checkCentroids(weights, centroids); if (m->getControl_pressed()) { return 0; } otuFreq.assign(numOTUs, 0); int total = 0; for(int i=0;igetControl_pressed()) { return 0; } double offset = MOTHURMAX; double norm = 0.0000; double minWeight = 0.1; vector currentTau(numOTUs); for(int j=0;jgetControl_pressed()) { return 0; } if(weights[j] > minWeight && distances[i * numSeqs+centroids[j]] < offset){ offset = distances[i * numSeqs+centroids[j]]; } } for(int j=0;jgetControl_pressed()) { return 0; } if(weights[j] > minWeight){ currentTau[j] = exp(sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j]; norm += currentTau[j]; } else{ currentTau[j] = 0.0000; } } for(int j=0;jgetControl_pressed()) { return 0; } if(currentTau[j] > 1.0e-4){ int oldTotal = total; total++; tau.resize(oldTotal+1); tau[oldTotal] = currentTau[j]; otuBySeqLookUp[j][otuFreq[j]] = oldTotal; aanI[j][otuFreq[j]] = i; otuFreq[j]++; } } anP.resize(total); anI.resize(total); } numIters++; } noise.updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); if (m->getControl_pressed()) { return 0; } vector percentage(numSeqs); noise.setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI); if (m->getControl_pressed()) { return 0; } noise.finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau); if (m->getControl_pressed()) { return 0; } change.assign(numOTUs, 1); noise.calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); if (m->getControl_pressed()) { return 0; } vector finalTau(numOTUs, 0); for(int i=0;igetControl_pressed()) { return 0; } finalTau[otuData[i]] += int(seqFreq[i]); } noise.writeOutput(outputFileName, nameFileName, mapFileName, finalTau, centroids, otuData, sequences, uniqueNames, redundantNames, seqFreq, distances); return 0; }catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "driver"); exit(1); } } //********************************************************************************************************************** int ShhhSeqsCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } if (outputdir == "") { outputdir = util.hasPath(fastafile); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outputFileName = getOutputFileName("fasta",variables); string nameFileName = getOutputFileName("name",variables); string mapFileName = getOutputFileName("map",variables); if (groupfile != "") { mapFileName = outputdir + util.getRootName(util.getSimpleName(fastafile)) + "shhh."; vector mapFileNames = createProcessesGroups(outputFileName, nameFileName, mapFileName); if (m->getControl_pressed()) { return 0; } for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); } //deconvolute results by running unique.seqs deconvoluteResults(outputFileName, nameFileName); if (m->getControl_pressed()) { return 0; } }else{ vector sequences; vector uniqueNames; vector redundantNames; vector seqFreq; seqNoise noise; correctDist* correct = new correctDist(processors); //reads fasta and name file and loads them in order readData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq); if (m->getControl_pressed()) { delete correct; return 0; } //calc distances for cluster string distFileName = outputdir + util.getRootName(util.getSimpleName(fastafile)) + "shhh.dist"; correct->execute(distFileName); delete correct; if (m->getControl_pressed()) { util.mothurRemove(distFileName); return 0; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName, m, sigma); outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName); } if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int ShhhSeqsCommand::readData(correctDist* correct, seqNoise& noise, vector& seqs, vector& uNames, vector& rNames, vector& freq) { try { map nameMap; map::iterator it; util.readNames(namefile, nameMap); bool error = false; ifstream in; util.openInputFile(fastafile, in); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } Sequence seq(in); gobble(in); if (seq.getName() != "") { correct->addSeq(seq.getName(), seq.getAligned()); it = nameMap.find(seq.getName()); if (it != nameMap.end()) { noise.addSeq(seq.getAligned(), seqs); noise.addRedundantName(it->first, it->second, uNames, rNames, freq); }else { m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your namefile, please correct."); error = true; } } } in.close(); if (error) { m->setControl_pressed(true); } return seqs.size(); }catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "readData"); exit(1); } } //********************************************************************************************************************** int loadData(MothurOut* m, correctDist* correct, seqNoise& noise, vector& seqs, vector& uNames, vector& rNames, vector& freq, map& nameMap, vector& sequences) { try { bool error = false; map::iterator it; for (int i = 0; i < sequences.size(); i++) { if (m->getControl_pressed()) { return 0; } if (sequences[i].getName() != "") { correct->addSeq(sequences[i].getName(), sequences[i].getAligned()); it = nameMap.find(sequences[i].getName()); if (it != nameMap.end()) { noise.addSeq(sequences[i].getAligned(), seqs); noise.addRedundantName(it->first, it->second, uNames, rNames, freq); }else { m->mothurOut("[ERROR]: " + sequences[i].getName() + " is in your fasta file and not in your namefile, please correct."); error = true; } } } if (error) { m->setControl_pressed(true); } return seqs.size(); }catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "loadData"); exit(1); } } /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct shhhseqsData { string fastafile; string namefile; string groupfile; string newFFile, newNFile, newMFile, extension, outputDir; MothurOut* m; int sigma, count; vector groups; vector mapfileNames; Utils util; shhhseqsData(){} shhhseqsData(string opd, string f, string n, string g, string nff, string nnf, string nmf, vector gr, int s, string ex) { outputDir = opd; fastafile = f; namefile = n; groupfile = g; newFFile = nff; newNFile = nnf; newMFile = nmf; m = MothurOut::getInstance(); sigma = s; groups = gr; extension = ex; count=0; } }; /**************************************************************************************************/ void driverShhSeqsGroups(shhhseqsData* params){ try { //Parse sequences by group //Parse sequences by group vector groups; map > group2Files; SequenceParser sparser(params->groupfile, params->fastafile, params->namefile, params->groups); groups = sparser.getNamesOfGroups(); group2Files = sparser.getFiles(); string fileroot = params->outputDir + params->util.getRootName(params->util.getSimpleName(params->fastafile)); for (map >::iterator it = group2Files.begin(); it != group2Files.end(); it++) { long start = time(nullptr); if (params->m->getControl_pressed()) { break; } string thisGroup = it->first; string lowerCaseName = thisGroup; for (int j = 0; j < lowerCaseName.length(); j++) { lowerCaseName[j] = tolower(lowerCaseName[j]); } if (lowerCaseName == "ignore") { } else { params->m->mothurOut("\nProcessing group " + thisGroup + ":\n"); map thisNameMap; params->util.readNames(it->second[1], thisNameMap); vector thisSeqs; ifstream in; params->util.openInputFile(it->second[0], in); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { thisSeqs.push_back(seq); } } in.close(); vector sequences; vector uniqueNames; vector redundantNames; vector seqFreq; seqNoise noise; correctDist* correct = new correctDist(1); //we use one processor since we already split up the work load. //load this groups info in order loadData(params->m, correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs); if (params->m->getControl_pressed()) { break; } //calc distances for cluster string distFileName = fileroot + thisGroup + ".shhh.dist"; correct->execute(distFileName); delete correct; if (params->m->getControl_pressed()) { params->util.mothurRemove(distFileName); break; } driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, params->newFFile+thisGroup, params->newNFile+thisGroup, params->newMFile+thisGroup+".map", params->m, params->sigma); if (params->m->getControl_pressed()) { break; } params->util.appendFiles(params->newFFile+thisGroup, params->newFFile+params->extension); params->util.mothurRemove(params->newFFile+thisGroup); params->util.appendFiles(params->newNFile+thisGroup, params->newNFile+params->extension); params->util.mothurRemove(params->newNFile+thisGroup); params->mapfileNames.push_back(params->newMFile+thisGroup+".map"); params->m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to process group " + thisGroup + ".\n"); } } } catch(exception& e) { params->m->errorOut(e, "ShhhSeqsCommand", "driverShhSeqsGroups"); exit(1); } } /**************************************************************************************************/ vector ShhhSeqsCommand::createProcessesGroups(string newFName, string newNName, string newMName) { try { GroupMap groupMap(groupfile); groupMap.readMap();vector groups = groupMap.getNamesOfGroups(); if (groups.size() < processors) { processors = groups.size(); m->mothurOut("Reducing processors to " + toString(groups.size()) + ".\n"); } //divide the groups between the processors vector > dividedGroupNames; int remainingPairs = groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } vector thisProcessorsGroups; for (int i = startIndex; i < (startIndex+numPairs); i++) { thisProcessorsGroups.push_back(groups[i]); } dividedGroupNames.push_back(thisProcessorsGroups); startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } vector workerThreads; vector data; for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; util.mothurRemove(newFName+extension); util.mothurRemove(newNName+extension); shhhseqsData* dataBundle = new shhhseqsData(outputdir, fastafile, namefile, groupfile, newFName, newNName, newMName, dividedGroupNames[i+1], sigma, extension); data.push_back(dataBundle); std::thread* thisThread = new std::thread(driverShhSeqsGroups, dataBundle); workerThreads.push_back(thisThread); } util.mothurRemove(newFName); util.mothurRemove(newNName); shhhseqsData* dataBundle = new shhhseqsData(outputdir, fastafile, namefile, groupfile, newFName, newNName, newMName, dividedGroupNames[0], sigma, ""); driverShhSeqsGroups(dataBundle); vector mapFileNames = dataBundle->mapfileNames; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); mapFileNames.insert(mapFileNames.end(), data[i]->mapfileNames.begin(), data[i]->mapfileNames.end()); util.appendFiles(data[i]->newFFile+data[i]->extension, newFName); util.mothurRemove(data[i]->newFFile+data[i]->extension); util.appendFiles(data[i]->newNFile+data[i]->extension, newNName); util.mothurRemove(data[i]->newNFile+data[i]->extension); delete data[i]; delete workerThreads[i]; } return mapFileNames; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "createProcessesGroups"); exit(1); } } //********************************************************************************************************************** int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){ try { m->mothurOutEndLine(); m->mothurOut("Deconvoluting results:\n"); m->mothurOutEndLine(); //use unique.seqs to create new name and fastafile string inputString = "fasta=" + fastaFile + ", name=" + nameFile; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); string newnameFile = filenames["name"][0]; string newfastaFile = filenames["fasta"][0]; util.mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); if (nameFile != newnameFile) { util.mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); } return 0; } catch(exception& e) { m->errorOut(e, "ShhhSeqsCommand", "deconvoluteResults"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/shhhseqscommand.h000077500000000000000000000033011424121717000215600ustar00rootroot00000000000000#ifndef SHHHSEQSCOMMAND_H #define SHHHSEQSCOMMAND_H /* * shhhseqscommand.h * Mothur * * Created by westcott on 11/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "myseqdist.h" #include "seqnoise.h" #include "sequenceparser.h" #include "uniqueseqscommand.h" #include "clustercommand.h" //********************************************************************************************************************** class ShhhSeqsCommand : public Command { public: ShhhSeqsCommand(string); ~ShhhSeqsCommand() = default; vector setParameters(); string getCommandName() { return "shhh.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nQuince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011). Removing noise from pyrosequenced amplicons. BMC Bioinformatics 12:38.\nhttp://www.mothur.org/wiki/Shhh.seqs"; } string getDescription() { return "shhh.seqs"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort; string fastafile, namefile, groupfile; int processors; double sigma; vector outputNames; int readData(correctDist*, seqNoise&, vector&, vector&, vector&, vector&); vector createProcessesGroups(string, string, string); int deconvoluteResults(string, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/sortseqscommand.cpp000077500000000000000000001175351424121717000221670ustar00rootroot00000000000000// // sortseqscommand.cpp // Mothur // // Created by Sarah Westcott on 2/3/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "sortseqscommand.h" #include "sequence.hpp" #include "qualityscores.h" //********************************************************************************************************************** vector SortSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta); CommandParameter pflow("flow", "InputTypes", "", "", "none", "FNGLT", "none","flow",false,false); parameters.push_back(pflow); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false); parameters.push_back(pname); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false); parameters.push_back(ptaxonomy); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile); CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge); CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["flow"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SortSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The sort.seqs command puts the sequences in the same order for the following file types: accnos fasta, name, taxonomy, flow or quality file.\n"; helpString += "The sort.seqs command parameters are accnos, fasta, name, taxonomy, flow, qfile and large.\n"; helpString += "The accnos file allows you to specify the order you want the files in. If none is provided, mothur will use the order of the first file it reads.\n"; helpString += "The large parameters is used to indicate your files are too large to fit in RAM.\n"; helpString += "The sort.seqs command should be in the following format: sort.seqs(fasta=yourFasta).\n"; helpString += "Example sort.seqs(fasta=amazon.fasta).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SortSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],sorted,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],sorted,[extension]"; } else if (type == "name") { pattern = "[filename],sorted,[extension]"; } else if (type == "flow") { pattern = "[filename],sorted,[extension]"; } else if (type == "qfile") { pattern = "[filename],sorted,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SortSeqsCommand::SortSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for parameters accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { accnosfile = ""; abort = true; } else if (accnosfile == "not found") { accnosfile = ""; } else { current->setAccnosFile(accnosfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } flowfile = validParameter.validFile(parameters, "flow"); if (flowfile == "not open") { flowfile = ""; abort = true; } else if (flowfile == "not found") { flowfile = ""; } else { current->setFlowFile(flowfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { abort = true; } else if (taxfile == "not found") { taxfile = ""; } else { current->setTaxonomyFile(taxfile); } qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { abort = true; } else if (qualfile == "not found") { qualfile = ""; } else { current->setQualFile(qualfile); } string temp = validParameter.valid(parameters, "large"); if (temp == "not found") { temp = "f"; } large = util.isTrue(temp); if ((fastafile == "") && (namefile == "") && (taxfile == "") && (flowfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, flow or quality.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "SortSeqsCommand"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } //read through the correct file and output lines you want to keep if (accnosfile != "") { vector temp; util.readAccnos(accnosfile, temp); for (int i = 0; i < temp.size(); i++) { names[temp[i]] = i; } m->mothurOut("\nUsing " + accnosfile + " to determine the order. It contains " + toString(temp.size()) + " sequences.\n"); } if (fastafile != "") { readFasta(); } if (flowfile != "") { readFlow(); } if (qualfile != "") { readQual(); } if (namefile != "") { readName(); } if (taxfile != "") { readTax(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("flow"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFlowFile(currentName); } } } return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::readFasta(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string name; if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming. //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000. //this way we only store 1000 seqs in memory at a time. int numNames = names.size(); int numNamesInFile = 0; //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { numNamesInFile++; map::iterator it = names.find(name); if (it == names.end()) { names[name] = numNames; numNames++; m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); out.close(); int numLeft = names.size(); if (numNamesInFile < numLeft) { numLeft = numNamesInFile; } int size = 1000; //assume that user can hold 1000 seqs in memory if (numLeft < size) { size = numLeft; } int times = 0; vector seqs; seqs.resize(size); for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage while (numLeft > 0) { ifstream in2; util.openInputFile(fastafile, in2); if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } int found = 0; int needToFind = size; if (numLeft < size) { needToFind = numLeft; } while(!in2.eof()){ if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } //stop reading if we already found the seqs we are looking for if (found >= needToFind) { break; } Sequence currSeq(in2); name = currSeq.getName(); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. //is it in the set of seqs we are looking for this time around int thisSeqsPlace = it->second; thisSeqsPlace -= (times * size); if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) { seqs[thisSeqsPlace] = currSeq; found++; } }else { m->mothurOut("[ERROR]: in logic of readFasta function.\n"); m->setControl_pressed(true); } } gobble(in2); } in2.close(); ofstream out2; util.openOutputFileAppend(outputFileName, out2); int output = seqs.size(); if (numLeft < seqs.size()) { output = numLeft; } for (int i = 0; i < output; i++) { if (seqs[i].getName() != "") { seqs[i].printSequence(out2); } } out2.close(); times++; numLeft -= output; } m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + fastafile + ".\n"); }else { vector seqs; seqs.resize(names.size()); for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. if (it->second > (seqs.size()-1)) { m->mothurOut("[WARNING]: Ignoring " + name + ". Could you have duplicate names in your fasta file?\n"); }else { seqs[it->second] = currSeq; } }else { //if we cant find it then add it to the end names[name] = seqs.size(); seqs.push_back(currSeq); m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); int count = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i].getName() != "") { seqs[i].printSequence(out); count++; } } out.close(); m->mothurOut("Ordered " + toString(count) + " sequences from " + fastafile + ".\n"); } }else { //read in file to fill names int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { //if this name is in the accnos file names[name] = count; count++; currSeq.printSequence(out); } gobble(in); } in.close(); out.close(); m->mothurOut("\nUsing " + fastafile + " to determine the order. It contains " + toString(count) + " sequences.\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "readFasta"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::readFlow(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(flowfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(flowfile)); variables["[extension]"] = util.getExtension(flowfile); string outputFileName = getOutputFileName("flow", variables); outputTypes["flow"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(flowfile, in); int numFlows; string name; in >> numFlows; gobble(in); if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming. //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000. //this way we only store 1000 seqs in memory at a time. int numNames = names.size(); int numNamesInFile = 0; //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> name; string rest = util.getline(in); if (name != "") { numNamesInFile++; map::iterator it = names.find(name); if (it == names.end()) { names[name] = numNames; numNames++; m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); out.close(); int numLeft = names.size(); if (numNamesInFile < numLeft) { numLeft = numNamesInFile; } int size = 1000; //assume that user can hold 1000 seqs in memory if (numLeft < size) { size = numLeft; } int times = 0; vector seqs; seqs.resize(size, ""); while (numLeft > 0) { ifstream in2; util.openInputFile(flowfile, in2); in2 >> numFlows; gobble(in2); if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } int found = 0; int needToFind = size; if (numLeft < size) { needToFind = numLeft; } while(!in2.eof()){ if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } //stop reading if we already found the seqs we are looking for if (found >= needToFind) { break; } in2 >> name; string rest = util.getline(in2); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. //is it in the set of seqs we are looking for this time around int thisSeqsPlace = it->second; thisSeqsPlace -= (times * size); if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) { seqs[thisSeqsPlace] = (name +'\t' + rest); found++; } }else { m->mothurOut("[ERROR]: in logic of readFlow function.\n"); m->setControl_pressed(true); } } gobble(in2); } in2.close(); ofstream out2; util.openOutputFileAppend(outputFileName, out2); int output = seqs.size(); if (numLeft < seqs.size()) { output = numLeft; } for (int i = 0; i < output; i++) { if (seqs[i] != "") { out2 << seqs[i] << endl; } } out2.close(); times++; numLeft -= output; } m->mothurOut("Ordered " + toString(numNamesInFile) + " flows from " + flowfile + ".\n"); }else { vector seqs; seqs.resize(names.size(), ""); while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> name; string rest = util.getline(in); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. seqs[it->second] = (name + '\t' + rest); }else { //if we cant find it then add it to the end names[name] = seqs.size(); seqs.push_back((name + '\t' + rest)); m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); int count = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i] != "") { out << seqs[i] << endl; count++; } } out.close(); m->mothurOut("Ordered " + toString(count) + " flows from " + flowfile + ".\n"); } }else { //read in file to fill names int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> name; string rest = util.getline(in); if (name != "") { //if this name is in the accnos file names[name] = count; count++; out << name << '\t' << rest << endl; } gobble(in); } in.close(); out.close(); m->mothurOut("\nUsing " + flowfile + " to determine the order. It contains " + toString(count) + " flows.\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "readFlow"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::readQual(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(qualfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(qualfile)); variables["[extension]"] = util.getExtension(qualfile); string outputFileName = getOutputFileName("qfile", variables); outputTypes["qfile"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(qualfile, in); string name; if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have if (large) { //if the file is too large to fit in memory we can still process it, but the io will be very time consuming. //read through the file looking for 1000 seqs at a time. Once we find them output them and start looking for the next 1000. //this way we only store 1000 seqs in memory at a time. int numNames = names.size(); int numNamesInFile = 0; //to make sure we dont miss any seqs, add any seqs that are not in names but in the file to the end of names while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } QualityScores currQual; currQual = QualityScores(in); name = currQual.getName(); if (name != "") { numNamesInFile++; map::iterator it = names.find(name); if (it == names.end()) { names[name] = numNames; numNames++; m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); out.close(); int numLeft = names.size(); if (numNamesInFile < numLeft) { numLeft = numNamesInFile; } int size = 1000; //assume that user can hold 1000 seqs in memory if (numLeft < size) { size = numLeft; } int times = 0; vector seqs; seqs.resize(size); for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage while (numLeft > 0) { ifstream in2; util.openInputFile(qualfile, in2); if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } int found = 0; int needToFind = size; if (numLeft < size) { needToFind = numLeft; } while(!in2.eof()){ if (m->getControl_pressed()) { in2.close(); util.mothurRemove(outputFileName); return 0; } //stop reading if we already found the seqs we are looking for if (found >= needToFind) { break; } QualityScores currQual; currQual = QualityScores(in2); name = currQual.getName(); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. //is it in the set of seqs we are looking for this time around int thisSeqsPlace = it->second; thisSeqsPlace -= (times * size); if ((thisSeqsPlace < size) && (thisSeqsPlace >= 0)) { seqs[thisSeqsPlace] = currQual; found++; } }else { m->mothurOut("[ERROR]: in logic of readQual function.\n"); m->setControl_pressed(true); } } gobble(in2); } in2.close(); ofstream out2; util.openOutputFileAppend(outputFileName, out2); int output = seqs.size(); if (numLeft < seqs.size()) { output = numLeft; } for (int i = 0; i < output; i++) { if (seqs[i].getName() != "") { seqs[i].printQScores(out2); } } out2.close(); times++; numLeft -= output; } m->mothurOut("Ordered " + toString(numNamesInFile) + " sequences from " + qualfile + ".\n"); }else { vector seqs; seqs.resize(names.size()); for (int i = 0; i < seqs.size(); i++) { seqs[i].setName(""); } //this is so if some of the seqs are missing we dont print out garbage while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } QualityScores currQual; currQual = QualityScores(in); name = currQual.getName(); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. seqs[it->second] = currQual; }else { //if we cant find it then add it to the end names[name] = seqs.size(); seqs.push_back(currQual); m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } gobble(in); } in.close(); int count = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i].getName() != "") { seqs[i].printQScores(out); count++; } } out.close(); m->mothurOut("Ordered " + toString(count) + " sequences from " + qualfile + ".\n"); } }else { //read in file to fill names int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } QualityScores currQual; currQual = QualityScores(in); gobble(in); if (currQual.getName() != "") { //if this name is in the accnos file names[currQual.getName()] = count; count++; currQual.printQScores(out); } gobble(in); } in.close(); out.close(); m->mothurOut("\nUsing " + qualfile + " to determine the order. It contains " + toString(count) + " sequences.\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "readQual"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::readName(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputFileName = getOutputFileName("name", variables); outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(namefile, in); string name, firstCol, secondCol; if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have vector seqs; seqs.resize(names.size(), ""); while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> firstCol; gobble(in); in >> secondCol; gobble(in); if (firstCol != "") { map::iterator it = names.find(firstCol); if (it != names.end()) { //we found it, so put it in the vector in the right place. seqs[it->second] = firstCol + '\t' + secondCol; }else { //if we cant find it then add it to the end names[firstCol] = seqs.size(); seqs.push_back((firstCol + '\t' + secondCol)); m->mothurOut(firstCol + " was not in the contained the file which determined the order, adding it to the end.\n"); } } } in.close(); int count = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i] != "") { out << seqs[i] << endl; count++; } } out.close(); m->mothurOut("Ordered " + toString(count) + " sequences from " + namefile + ".\n"); }else { //read in file to fill names int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> firstCol; gobble(in); in >> secondCol; gobble(in); if (firstCol != "") { //if this name is in the accnos file names[firstCol] = count; count++; out << firstCol << '\t' << secondCol << endl; } gobble(in); } in.close(); out.close(); m->mothurOut("\nUsing " + namefile + " to determine the order. It contains " + toString(count) + " representative sequences.\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "readName"); exit(1); } } //********************************************************************************************************************** int SortSeqsCommand::readTax(){ try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(taxfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(taxfile)); variables["[extension]"] = util.getExtension(taxfile); string outputFileName = getOutputFileName("taxonomy", variables); outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(taxfile, in); string name, tax; if (names.size() != 0) {//this is not the first file we are reading so we need to use the order we already have vector seqs; seqs.resize(names.size(), ""); while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> name; gobble(in); tax = util.getline(in); gobble(in); if (name != "") { map::iterator it = names.find(name); if (it != names.end()) { //we found it, so put it in the vector in the right place. seqs[it->second] = name + '\t' + tax; }else { //if we cant find it then add it to the end names[name] = seqs.size(); seqs.push_back((name + '\t' + tax)); m->mothurOut(name + " was not in the contained the file which determined the order, adding it to the end.\n"); } } } in.close(); int count = 0; for (int i = 0; i < seqs.size(); i++) { if (seqs[i] != "") { out << seqs[i] << endl; count++; } } out.close(); m->mothurOut("Ordered " + toString(count) + " sequences from " + taxfile + ".\n"); }else { //read in file to fill names int count = 0; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); util.mothurRemove(outputFileName); return 0; } in >> name; gobble(in); tax = util.getline(in); gobble(in); if (name != "") { //if this name is in the accnos file names[name] = count; count++; out << name << '\t' << tax << endl; } gobble(in); } in.close(); out.close(); m->mothurOut("\nUsing " + taxfile + " to determine the order. It contains " + toString(count) + " sequences.\n"); } return 0; return 0; } catch(exception& e) { m->errorOut(e, "SortSeqsCommand", "readTax"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/sortseqscommand.h000077500000000000000000000022321424121717000216170ustar00rootroot00000000000000#ifndef Mothur_sortseqscommand_h #define Mothur_sortseqscommand_h // // sortseqscommand.h // Mothur // // Created by Sarah Westcott on 2/3/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "command.hpp" #include "counttable.h" class SortSeqsCommand : public Command { public: SortSeqsCommand(string); ~SortSeqsCommand(){} vector setParameters(); string getCommandName() { return "sort.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Sort.seqs"; } string getDescription() { return "puts sequences from a fasta, name, group, quality, flow or taxonomy file in the same order"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: map names; string accnosfile, fastafile, namefile, taxfile, qualfile, flowfile; bool abort, large; vector outputNames; int readFasta(); int readFlow(); int readName(); int readTax(); int readQual(); }; #endif mothur-1.48.0/source/commands/sparcccommand.cpp000077500000000000000000000435201424121717000215470ustar00rootroot00000000000000// // sparcccommand.cpp // Mothur // // Created by SarahsWork on 5/10/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "sparcccommand.h" //********************************************************************************************************************** vector SparccCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings); CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations); CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations); CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos... outputTypes["pvalue"] = tempOutNames; outputTypes["sparccrelabund"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SparccCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SparccCommand::getHelpString(){ try { string helpString = ""; helpString += "The sparcc command allows you to ....\n"; helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n"; helpString += "The samplings parameter is used to .... Default=20.\n"; helpString += "The iterations parameter is used to ....Default=10.\n"; helpString += "The permutations parameter is used to ....Default=1000.\n"; helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n"; helpString += "The default value for groups is all the groups in your sharedfile.\n"; helpString += "The label parameter is used to analyze specific labels in your shared file.\n"; helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n"; helpString += "sparcc(shared=final.an.shared)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SparccCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SparccCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "corr") { pattern = "[filename],[distance],sparcc_correlation"; } else if (type == "pvalue") { pattern = "[filename],[distance],sparcc_pvalue"; } else if (type == "sparccrelabund") { pattern = "[filename],[distance],sparcc_relabund"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SparccCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SparccCommand::SparccCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } normalizeMethod = validParameter.valid(parameters, "method"); if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; } if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { } else { m->mothurOut(normalizeMethod + " is not a valid method. Valid methods are dirichlet and relabund.\n"); abort = true; } string temp = validParameter.valid(parameters, "samplings"); if (temp == "not found"){ temp = "20"; } util.mothurConvert(temp, numSamplings); if(normalizeMethod == "relabund"){ numSamplings = 1; } temp = validParameter.valid(parameters, "iterations"); if (temp == "not found"){ temp = "10"; } util.mothurConvert(temp, maxIterations); temp = validParameter.valid(parameters, "permutations"); if (temp == "not found"){ temp = "1000"; } util.mothurConvert(temp, numPermutations); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } } } catch(exception& e) { m->errorOut(e, "SparccCommand", "SparccCommand"); exit(1); } } //********************************************************************************************************************** int SparccCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " seconds to process.\n\n"); //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SparccCommand", "execute"); exit(1); } } //********************************************************************************************************************** int SparccCommand::process(SharedRAbundVectors*& shared){ try { cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); vector > sharedVector; vector otuNames = shared->getOTUNames(); vector data = shared->getSharedRAbundVectors(); //fill sharedVector to pass to CalcSparcc for (int i = 0; i < data.size(); i++) { vector abunds = data[i]->get(); vector temp; for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); } sharedVector.push_back(temp); } int numOTUs = (int)sharedVector[0].size(); int numGroups = data.size(); map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[distance]"] = shared->getLabel(); string relAbundFileName = getOutputFileName("sparccrelabund", variables); ofstream relAbundFile; util.openOutputFile(relAbundFileName, relAbundFile); outputNames.push_back(relAbundFileName); outputTypes["sparccrelabund"].push_back(relAbundFileName); relAbundFile << "OTU\taveRelAbund\n"; for(int i=0;igetControl_pressed()) { for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); relAbundFile.close(); return 0; } double relAbund = 0.0000; for(int j=0;jgetNumSeqs(); } relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl; } relAbundFile.close(); CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod); vector > origCorrMatrix = originalData.getRho(); string correlationFileName = getOutputFileName("corr", variables); ofstream correlationFile; util.openOutputFile(correlationFileName, correlationFile); outputNames.push_back(correlationFileName); outputTypes["corr"].push_back(correlationFileName); correlationFile.setf(ios::fixed, ios::floatfield); correlationFile.setf(ios::showpoint); correlationFile << "OTU_id"; for(int i=0;i > pValues = createProcesses(sharedVector, origCorrMatrix); if (m->getControl_pressed()) { for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); return 0; } string pValueFileName = getOutputFileName("pvalue", variables); ofstream pValueFile; util.openOutputFile(pValueFileName, pValueFile); outputNames.push_back(pValueFileName); outputTypes["pvalue"].push_back(pValueFileName); pValueFile.setf(ios::fixed, ios::floatfield); pValueFile.setf(ios::showpoint); for(int i=0;ierrorOut(e, "SparccCommand", "process"); exit(1); } } /**************************************************************************************************/ struct sparccData { MothurOut* m; vector< vector > sharedVector; vector< vector > origCorrMatrix; vector > pValues; int numSamplings, maxIterations, numPermutations, numOTUs; string normalizeMethod; Utils util; sparccData(){} sparccData(vector< vector > cs, vector< vector > co, int ns, int mi, int np, string nm) { m = MothurOut::getInstance(); sharedVector = cs; origCorrMatrix = co; numSamplings = ns; maxIterations = mi; numPermutations = np; normalizeMethod = nm; numOTUs = sharedVector[0].size(); pValues.resize(numOTUs); for(int i=0;i > shuffleSharedVector(vector >& sharedVector, MothurOut* m, Utils& util){ try { int numGroups = (int)sharedVector.size(); int numOTUs = (int)sharedVector[0].size(); vector > shuffledVector = sharedVector; for(int i=0;ierrorOut(e, "SparccCommand", "shuffleSharedVector"); exit(1); } } //********************************************************************************************************************** void driverSparcc(sparccData* params){ try { vector > sharedShuffled = params->sharedVector; for(int i=0;inumPermutations;i++){ if (params->m->getControl_pressed()) { break; } sharedShuffled = shuffleSharedVector(params->sharedVector, params->m, params->util); CalcSparcc permutedData(sharedShuffled, params->maxIterations, params->numSamplings, params->normalizeMethod); vector > permuteCorrMatrix = permutedData.getRho(); for(int j=0;jnumOTUs;j++){ for(int k=0;km->getControl_pressed()) { break; } double randValue = permuteCorrMatrix[j][k]; double observedValue = params->origCorrMatrix[j][k]; if(observedValue >= 0 && randValue > observedValue) { params->pValues[j][k]++; }//this method seems to deflate the else if(observedValue < 0 && randValue < observedValue){ params->pValues[j][k]++; }//pvalues of small rho values } } float done = ceil(params->numPermutations * 0.05); if((i+1) % (int)(done) == 0){ params->m->mothurOutJustToScreen(toString(i+1)+"\n"); } } } catch(exception& e) { params->m->errorOut(e, "SparccCommand", "driverSparcc"); exit(1); } } //********************************************************************************************************************** vector > SparccCommand::createProcesses(vector >& sharedVector, vector >& origCorrMatrix){ try { //divide work by number of permutations vector lines; if (processors > numPermutations) { processors = numPermutations; } //figure out how many sequences you have to process int numItersPerProcessor = numPermutations / processors; for (int i = 0; i < processors; i++) { if(i == (processors - 1)){ numItersPerProcessor = numPermutations - i * numItersPerProcessor; } lines.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { sparccData* dataBundle = new sparccData(sharedVector, origCorrMatrix, numSamplings, maxIterations, lines[i+1], normalizeMethod); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverSparcc, dataBundle)); } int numOTUs = sharedVector[0].size(); sparccData* dataBundle = new sparccData(sharedVector, origCorrMatrix, numSamplings, maxIterations, lines[0], normalizeMethod); driverSparcc(dataBundle); vector > pValues = dataBundle->pValues; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); vector > thisProcessorsPValues = data[i]->pValues; for (int k = 0; k < numOTUs; k++) { for (int j = 0; j < k; j++) { pValues[k][j] += thisProcessorsPValues[k][j]; } } delete data[i]; delete workerThreads[i]; } delete dataBundle; for(int i=0;ierrorOut(e, "SparccCommand", "createProcesses"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/sparcccommand.h000077500000000000000000000036311424121717000212130ustar00rootroot00000000000000// // sparcccommand.h // Mothur // // Created by SarahsWork on 5/10/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_sparcccommand_h #define Mothur_sparcccommand_h #include "command.hpp" #include "inputdata.h" #include "calcsparcc.h" /**************************************************************************************************/ class SparccCommand : public Command { public: SparccCommand(string); ~SparccCommand(){} vector setParameters(); string getCommandName() { return "sparcc"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getOutputPattern(string); //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden string getHelpString(); string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; } string getDescription() { return "Calculates correlations between OTUs using a method that is insensitive to the use of relative abundance data"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, allLines; string sharedfile, normalizeMethod; int numSamplings, maxIterations, numPermutations, processors; set labels; vector Groups; vector outputNames; int process(SharedRAbundVectors*&); vector > createProcesses(vector >&, vector >&); //vector > driver(vector >&, vector >&, int); //vector > shuffleSharedVector(vector >&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/splitabundcommand.cpp000066400000000000000000000774741424121717000224550ustar00rootroot00000000000000/* * splitabundcommand.cpp * Mothur * * Created by westcott on 5/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "splitabundcommand.h" #include "getseqscommand.h" #include "getotuscommand.h" //********************************************************************************************************************** vector SplitAbundCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["accnos"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["count"] = tempOutNames; abort = false; calledHelp = false; allLines = true; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SplitAbundCommand::getHelpString(){ try { string helpString = ""; helpString += "The split.abund command reads a fasta file or a list or a names or a count file splits the sequences into rare and abundant groups. \n"; helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label and cutoff.\n"; helpString += "The fasta or a list or name or count parameter are required, and you must provide a cutoff value.\n"; helpString += "The cutoff parameter is used to qualify what is abundant and rare. If cutoff < 1, mothur assumes this is a percentage. 0.02 -> rare reads represent <= 2% of total reads. \n"; helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n"; helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n"; helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n"; helpString += "If you want .abund and .rare files for all groups, set groups=all. \n"; helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n"; helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SplitAbundCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[tag],[tag2],fasta"; } else if (type == "list") { pattern = "[filename],[tag],[tag2],list"; } else if (type == "name") { pattern = "[filename],[tag],names-[filename],[tag],[tag2],names"; } else if (type == "count") { pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table"; } else if (type == "group") { pattern = "[filename],[tag],[tag2],groups"; } else if (type == "accnos") { pattern = "[filename],[tag],[tag2],accnos"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SplitAbundCommand::SplitAbundCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else{ inputFile = listfile; current->setListFile(listfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else{ inputFile = namefile; current->setNameFile(namefile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); ct.readTable(countfile, true, false); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } //do you have all files needed if ((listfile == "") && (namefile == "") && (countfile == "") && (fastafile == "")) { namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile file, and a fasta, list, name or count file is required.\n"); abort = true; } } } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; allLines = true; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } string temp = validParameter.valid(parameters, "accnos"); if (temp == "not found") { temp = "F"; } accnos = util.isTrue(temp); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, cutoff); if (cutoff == 0) { m->mothurOut("[ERROR]: You must provide a cutoff to qualify what is abundant for the split.abund command. \n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand"); exit(1); } } //********************************************************************************************************************** SplitAbundCommand::~SplitAbundCommand(){} //********************************************************************************************************************** int SplitAbundCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (listfile != "") { splitList(); } else if (namefile != "") { splitNames(); } else if (countfile != "") { splitCount(); } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "execute"); exit(1); } } /**********************************************************************************************************************/ int SplitAbundCommand::splitList() { try { InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); if (cutoff < 1) { //percentage instead of raw count int total = list->getNumSeqs(); if (countfile != "") { total = ct.getNumSeqs(); } else if (namefile != "") { total = util.scanNames(namefile); } float percentage = cutoff; cutoff = int(percentage * total); m->mothurOut("\nSetting cutoff to " + toString(cutoff) + "\n"); } if (m->getControl_pressed()) { delete list; return 0; } while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } process(list); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "splitList"); exit(1); } } /**********************************************************************************************************************/ int SplitAbundCommand::process(ListVector* thisList) { try { set rareNames; set abundNames; set abundOTUs, rareOTUs; //get rareNames and abundNames for (int i = 0; i < thisList->getNumBins(); i++) { if (m->getControl_pressed()) { return 0; } string bin = thisList->get(i); vector names; util.splitAtComma(bin, names); //parses bin into individual sequence names int size = names.size(); //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs if (countfile != "") { size = 0; for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); } } if (size <= cutoff) { for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); } rareOTUs.insert(thisList->getOTUName(i)); }else{ for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); } abundOTUs.insert(thisList->getOTUName(i)); } }//end for string tag = thisList->getLabel(); vector accnosOTUs = writeAccnos(tag+"_OTUS", rareOTUs, abundOTUs); //return rare, abund accnos files map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(listfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; string rareList = getOutputFileName("list",variables); variables["[tag2]"] = "abund"; string abundList = getOutputFileName("list",variables); string inputString = "accnos=" + accnosOTUs[0] + ", list=" + listfile + ", label=" + tag; //get rare m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.otus(" + inputString + ")\n"); Command* getOTUSCommand = new GetOtusCommand(inputString); getOTUSCommand->execute(); map > filenames = getOTUSCommand->getOutputFiles(); delete getOTUSCommand; util.renameFile(filenames["list"][0], rareList); outputNames.push_back(rareList); outputTypes["list"].push_back(rareList); m->mothurOut("/******************************************/\nDone.\n"); inputString = "accnos=" + accnosOTUs[1] + ", list=" + listfile+ ", label=" + tag; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.otus(" + inputString + ")\n"); getOTUSCommand = new GetOtusCommand(inputString); getOTUSCommand->execute(); filenames = getOTUSCommand->getOutputFiles(); delete getOTUSCommand; util.renameFile(filenames["list"][0], abundList); outputNames.push_back(abundList); outputTypes["list"].push_back(abundList); m->mothurOut("/******************************************/\nDone.\n"); string rareCount, abundCount, rareName, abundName, rareFasta, abundFasta, rareGroup, abundGroup; string inputString2 = ""; if (countfile != "") { if (outputdir == "") { thisOutputDir = util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; rareCount = getOutputFileName("count",variables); variables["[tag2]"] = "abund"; abundCount = getOutputFileName("count",variables); inputString2 += ", count=" + countfile; }else if (groupfile != "") { if (outputdir == "") { thisOutputDir = util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; rareGroup = getOutputFileName("group",variables); variables["[tag2]"] = "abund"; abundGroup = getOutputFileName("group",variables); inputString2 += ", group=" + groupfile; } if (fastafile != "") { if (outputdir == "") { thisOutputDir = util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; rareFasta = getOutputFileName("fasta",variables); variables["[tag2]"] = "abund"; abundFasta = getOutputFileName("fasta",variables); inputString2 += ", fasta=" + fastafile; } if (namefile != "") { if (outputdir == "") { thisOutputDir = util.hasPath(namefile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; rareName = getOutputFileName("name",variables); variables["[tag2]"] = "abund"; abundName = getOutputFileName("name",variables); inputString2 += ", name=" + namefile; } if (inputString2 != "") { vector accnosNames = writeAccnos(tag, rareNames, abundNames); //return rare, abund accnos files inputString = "dups=t, accnos=" + accnosNames[0]; //get rare m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + inputString2 + ")\n"); Command* getCommand = new GetSeqsCommand(inputString+ inputString2); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; if (countfile != "") { util.renameFile(filenames["count"][0], rareCount); outputNames.push_back(rareCount); outputTypes["count"].push_back(rareCount); } else if (groupfile != "") { util.renameFile(filenames["group"][0], rareGroup); outputNames.push_back(rareGroup); outputTypes["group"].push_back(rareGroup); } if (fastafile != "") { util.renameFile(filenames["fasta"][0], rareFasta); outputNames.push_back(rareFasta); outputTypes["fasta"].push_back(rareFasta); } if (namefile != "") { util.renameFile(filenames["name"][0], rareName); outputNames.push_back(rareName); outputTypes["name"].push_back(rareName); } m->mothurOut("/******************************************/\nDone.\n"); inputString = "dups=t, accnos=" + accnosNames[1]; //get rare m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + inputString2 + ")\n"); getCommand = new GetSeqsCommand(inputString+ inputString2); getCommand->execute(); filenames = getCommand->getOutputFiles(); delete getCommand; if (countfile != "") { util.renameFile(filenames["count"][0], abundCount); outputNames.push_back(abundCount); outputTypes["count"].push_back(abundCount); } else if (groupfile != "") { util.renameFile(filenames["group"][0], abundGroup); outputNames.push_back(abundGroup); outputTypes["group"].push_back(abundGroup); } if (fastafile != "") { util.renameFile(filenames["fasta"][0], abundFasta); outputNames.push_back(abundFasta); outputTypes["fasta"].push_back(abundFasta); } if (namefile != "") { util.renameFile(filenames["name"][0], abundName); outputNames.push_back(abundName); outputTypes["name"].push_back(abundName); } m->mothurOut("/******************************************/\nDone.\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "process"); exit(1); } } /**********************************************************************************************************************/ int SplitAbundCommand::splitCount() { //countfile try { inputFile = countfile; set rareNames; set abundNames; if (cutoff < 1) { //cutoff is a percentage rather than a explicit size float percentage = cutoff; int totalSeqs = ct.getNumSeqs(); cutoff = int(totalSeqs * percentage); m->mothurOut("\nSetting cutoff to " + toString(cutoff) + "\n"); } vector allNames = ct.getNamesOfSeqs(); for (int i = 0; i < allNames.size(); i++) { if (m->getControl_pressed()) { return 0; } int size = ct.getNumSeqs(allNames[i]); if (size <= cutoff) { rareNames.insert(allNames[i]); }else{ abundNames.insert(allNames[i]); } } vector accnosNames = writeAccnos("", rareNames, abundNames); //return rare, abund accnos files map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = ""; variables["[tag2]"] = "rare"; string rareFasta = getOutputFileName("fasta",variables); variables["[tag2]"] = "abund"; string abundFasta = getOutputFileName("fasta",variables); if (outputdir == "") { thisOutputDir = util.hasPath(countfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[tag]"] = ""; variables["[tag2]"] = "rare"; string rareCount = getOutputFileName("count",variables); variables["[tag2]"] = "abund"; string abundCount = getOutputFileName("count",variables); string inputString = "dups=t, accnos=" + accnosNames[0] + ", count=" + countfile; //get rare if (fastafile != "") { inputString += ", fasta=" + fastafile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; util.renameFile(filenames["count"][0], rareCount); outputNames.push_back(rareCount); outputTypes["count"].push_back(rareCount); if (fastafile != "") { util.renameFile(filenames["fasta"][0], rareFasta); outputNames.push_back(rareFasta); outputTypes["fasta"].push_back(rareFasta); } m->mothurOut("/******************************************/\nDone.\n"); inputString = "dups=t, accnos=" + accnosNames[1] + ", count=" + countfile; //get rare if (fastafile != "") { inputString += ", fasta=" + fastafile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); getCommand = new GetSeqsCommand(inputString); getCommand->execute(); filenames = getCommand->getOutputFiles(); delete getCommand; util.renameFile(filenames["count"][0], abundCount); outputNames.push_back(abundCount); outputTypes["count"].push_back(abundCount); if (fastafile != "") { util.renameFile(filenames["fasta"][0], abundFasta); outputNames.push_back(abundFasta); outputTypes["fasta"].push_back(abundFasta); } m->mothurOut("/******************************************/\nDone.\n"); return 0; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "splitCount"); exit(1); } } /**********************************************************************************************************************/ int SplitAbundCommand::splitNames() { //namefile try { set rareNames; set abundNames; if (cutoff < 1) { //percentage used, find cutoff value float percentage = cutoff; int totalSeqs = util.scanNames(namefile); cutoff = int(totalSeqs * percentage); m->mothurOut("\nSetting cutoff to " + toString(cutoff) + "\n"); } //open input file ifstream in; util.openInputFile(namefile, in); while (!in.eof()) { if (m->getControl_pressed()) { break; } string firstCol, secondCol; in >> firstCol; gobble(in); in >> secondCol; gobble(in); int size = util.getNumNames(secondCol); if (size <= cutoff) { rareNames.insert(firstCol); }else{ abundNames.insert(firstCol); } } in.close(); vector accnosNames = writeAccnos("", rareNames, abundNames); //return rare, abund accnos files map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(groupfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[tag]"] = ""; variables["[tag2]"] = "rare"; string rareGroup = getOutputFileName("group",variables); variables["[tag2]"] = "abund"; string abundGroup = getOutputFileName("group",variables); if (outputdir == "") { thisOutputDir = util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = ""; variables["[tag2]"] = "rare"; string rareFasta = getOutputFileName("fasta",variables); variables["[tag2]"] = "abund"; string abundFasta = getOutputFileName("fasta",variables); if (outputdir == "") { thisOutputDir = util.hasPath(namefile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[tag]"] = ""; variables["[tag2]"] = "rare"; string rareName = getOutputFileName("name",variables); variables["[tag2]"] = "abund"; string abundName = getOutputFileName("name",variables); string inputString = "dups=t, accnos=" + accnosNames[0] + ", name=" + namefile; //get rare if (groupfile != "") { inputString += ", group=" + groupfile; } if (fastafile != "") { inputString += ", fasta=" + fastafile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; util.renameFile(filenames["name"][0], rareName); outputNames.push_back(rareName); outputTypes["name"].push_back(rareName); if (groupfile != "") { util.renameFile(filenames["group"][0], rareGroup); outputNames.push_back(rareGroup); outputTypes["group"].push_back(rareGroup); } if (fastafile != "") { util.renameFile(filenames["fasta"][0], rareFasta); outputNames.push_back(rareFasta); outputTypes["fasta"].push_back(rareFasta); } m->mothurOut("/******************************************/\nDone.\n"); inputString = "dups=t, accnos=" + accnosNames[1] + ", name=" + namefile; //get rare if (groupfile != "") { inputString += ", group=" + groupfile; } if (fastafile != "") { inputString += ", fasta=" + fastafile; } m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); getCommand = new GetSeqsCommand(inputString); getCommand->execute(); filenames = getCommand->getOutputFiles(); delete getCommand; util.renameFile(filenames["name"][0], abundName); outputNames.push_back(abundName); outputTypes["name"].push_back(abundName); if (groupfile != "") { util.renameFile(filenames["group"][0], abundGroup); outputNames.push_back(abundGroup); outputTypes["group"].push_back(abundGroup); } if (fastafile != "") { util.renameFile(filenames["fasta"][0], abundFasta); outputNames.push_back(abundFasta); outputTypes["fasta"].push_back(abundFasta); } m->mothurOut("/******************************************/\nDone.\n"); return 0; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "splitNames"); exit(1); } } /**********************************************************************************************************************/ //just write the unique names - if a namesfile is given vector SplitAbundCommand::writeAccnos(string tag, set rareNames, set abundNames) { try { vector outputAccnosFiles; map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(inputFile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(inputFile)); variables["[tag]"] = tag; variables["[tag2]"] = "rare"; string rare = getOutputFileName("accnos",variables); outputAccnosFiles.push_back(rare); ofstream aout, rout; util.openOutputFile(rare, rout); outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); for (set::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) { rout << (*itRare) << endl; } rout.close(); variables["[tag2]"] = "abund"; string abund = getOutputFileName("accnos",variables); util.openOutputFile(abund, aout); outputNames.push_back(abund); outputTypes["accnos"].push_back(abund); outputAccnosFiles.push_back(abund); for (set::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) { aout << (*itAbund) << endl; } aout.close(); return outputAccnosFiles; } catch(exception& e) { m->errorOut(e, "SplitAbundCommand", "writeAccnos"); exit(1); } } /**********************************************************************************************************************/ mothur-1.48.0/source/commands/splitabundcommand.h000077500000000000000000000035721424121717000221110ustar00rootroot00000000000000#ifndef SPLITABUNDCOMMAND_H #define SPLITABUNDCOMMAND_H /* * splitabundcommand.h * Mothur * * Created by westcott on 5/17/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ /* split.abund - given a list or name file and a number (cutoff), make two files - *rare* and *abund* - where rare has data for otus that have fewer sequences than the cutoff and abund has data for otus that have as many or more sequences as the cutoff. also allow an option where a user can give a group file with the list or names file and split the group file into rare and abund. */ #include "command.hpp" #include "groupmap.h" #include "inputdata.h" #include "listvector.hpp" #include "sequence.hpp" #include "counttable.h" /***************************************************************************************/ class SplitAbundCommand : public Command { public: SplitAbundCommand(string); ~SplitAbundCommand(); vector setParameters(); string getCommandName() { return "split.abund"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Split.abund"; } string getDescription() { return "split a list, name, group or fasta file based on abundance"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: int splitList(); int splitCount(); int splitNames(); int process(ListVector*); int writeList(ListVector*, string, int); vector writeAccnos(string, set, set); vector outputNames; CountTable ct; string listfile, namefile, groupfile, countfile, label, groups, fastafile, inputFile; set labels; bool abort, allLines, accnos; float cutoff; }; /***************************************************************************************/ #endif mothur-1.48.0/source/commands/splitgroupscommand.cpp000077500000000000000000001241461424121717000226730ustar00rootroot00000000000000/* * splitgroupscommand.cpp * Mothur * * Created by westcott on 9/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "splitgroupscommand.h" #include "sequenceparser.h" #include "counttable.h" #include "inputdata.h" //********************************************************************************************************************** vector SplitGroupCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,false,true); parameters.push_back(plist); CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","fasta",false,false,true); parameters.push_back(pflow); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","fastq",false,false,true); parameters.push_back(pfastq); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "CountGroup", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["list"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SplitGroupCommand::getHelpString(){ try { string helpString = ""; helpString += "The split.groups command reads a group or count file, and parses your files by groups. \n"; helpString += "The split.groups command parameters are fasta, fastq, flow, name, group, count, groups and processors.\n"; helpString += "The group or count parameter is required.\n"; helpString += "The groups parameter allows you to select groups to create files for. \n"; helpString += "The format parameter is used with the fastq parameter to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=illumina1.8+.\n"; helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files. \n"; helpString += "If you want .fasta and .names files for all groups, set groups=all. \n"; helpString += "The split.groups command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n"; helpString += "Example: split.groups(fasta=abrecovery.fasta, group=abrecovery.groups).\n"; ; return helpString; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SplitGroupCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],[group],fasta"; } else if (type == "list") { pattern = "[filename],[group],list"; } else if (type == "fastq") { pattern = "[filename],[group],fastq"; } else if (type == "flow") { pattern = "[filename],[group],flow"; } else if (type == "name") { pattern = "[filename],[group],names"; } else if (type == "count") { pattern = "[filename],[group],count_table"; } else if (type == "group") { pattern = "[filename],[group],groups"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SplitGroupCommand::SplitGroupCommand(vector g, string ffile, string count, string outd) : Command() { try { abort = false; calledHelp = false; vector tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["fasta"] = tempOutNames; Groups = g; groupfile = ""; namefile = ""; fastafile = ffile; countfile = count; outputdir = outd; string temp = current->getProcessors(); processors = current->setProcessors(temp); if (processors > Groups.size()) { processors = Groups.size(); m->mothurOut("Reducing processors to " + toString(Groups.size()) + ".\n"); } //divide the groups between the processors int remainingPairs = Groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } splitFastaCount(); } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand - mothurRun"); exit(1); } } //********************************************************************************************************************** SplitGroupCommand::SplitGroupCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } fastqfile = validParameter.validFile(parameters, "fastq"); if (fastqfile == "not open") { abort = true; } else if (fastqfile == "not found") { fastqfile = ""; } flowfile = validParameter.validFile(parameters, "flow"); if (flowfile == "not open") { abort = true; } else if (flowfile == "not found") { flowfile = ""; } else { current->setFlowFile(flowfile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; }else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((fastafile == "") && (flowfile == "") && (fastqfile == "")) { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { flowfile = current->getFlowFile(); if (flowfile != "") { m->mothurOut("Using " + flowfile + " as input file for the flow parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a fasta, list, fastq or flow file.\n"); abort = true; } } } } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if ((countfile != "") && (groupfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or group.\n"); abort = true; } if ((countfile == "") && (groupfile == "")) { if (namefile == "") { //check for count then group countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { m->mothurOut("You need to provide a count or group file.\n"); abort = true; } } }else { //check for group groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { m->mothurOut("You need to provide a count or group file.\n"); abort = true; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); format = validParameter.valid(parameters, "format"); if (format == "not found"){ format = "illumina1.8+"; } if ((format != "sanger") && (format != "illumina") && (format != "illumina1.8+") && (format != "solexa")) { m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa, illumina1.8+ and illumina, aborting.\n" ); abort=true; } if (outputdir == ""){ if (groupfile != "") { outputdir = util.hasPath(groupfile); } else { outputdir = util.hasPath(countfile); } } } } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand"); exit(1); } } //********************************************************************************************************************** int SplitGroupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } vector namesGroups; if (groupfile != "") { GroupMap groupMap(groupfile); groupMap.readMap(); namesGroups = groupMap.getNamesOfGroups(); }else if (countfile != ""){ CountTable ct; ct.readTable(countfile, true, true, Groups); namesGroups = ct.getNamesOfGroups(); }else { m->mothurOut("[ERROR]: you must provide a count or group file to split by group. quitting... \n"); m->setControl_pressed(true); return 0; } if (Groups.size() == 0) { Groups = namesGroups; } if (processors > Groups.size()) { processors = Groups.size(); m->mothurOut("Reducing processors to " + toString(Groups.size()) + ".\n"); } //divide the groups between the processors int remainingPairs = Groups.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } if (flowfile != "") { splitFastqOrFlow(flowfile, ".flow"); } if (fastqfile != "") { splitFastqOrFlow(fastqfile, ".fastq"); } if ((fastafile != "") || (listfile != "")) { bool isCount = true; if (countfile == "" ) { isCount = false; } splitCountOrGroup(isCount); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("flow"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFlowFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "execute"); exit(1); } } //********************************************************************************************************************** int driverRunNameGroup(splitGroups2Struct* params){ try { if (params->m->getControl_pressed()) { return 0; } GroupMap groupMap; groupMap.readMap(params->groupfile, params->Groups); vector namesGroups = groupMap.getNamesOfGroups(); if (params->Groups.size() == 0) { params->Groups = namesGroups; } //GroupName -> files(fasta, list, group, name) for (int i = 0; i < params->Groups.size(); i++) { vector files; map >::iterator it = params->group2Files.find(params->Groups[i]); if (it != params->group2Files.end()) { files = it->second; } else { params->m->mothurOut("[ERROR]: Can find group " + params->Groups[i] + ", quitting.\n"); params->m->setControl_pressed(true); break; } params->m->mothurOut("Processing group: " + params->Groups[i] + "\n"); vector namesSeqsInThisGroup = groupMap.getNamesSeqs(params->Groups[i]); ofstream outGroup, outAccnos; params->util.openOutputFile(files[2], outGroup); params->util.openOutputFile(files[2]+".accnos", outAccnos); for (long long j = 0; j < namesSeqsInThisGroup.size(); j++) { outGroup << namesSeqsInThisGroup[j] << '\t' << params->Groups[i] << endl; outAccnos << namesSeqsInThisGroup[j] << endl; } outGroup.close(); outAccnos.close(); params->outputNames.push_back(files[2]); params->outputTypes["group"].push_back(files[2]); //use unique.seqs to create new name and fastafile string uniqueFasta = params->fastafile+params->Groups[i]; string uniqueName = params->namefile+params->Groups[i]; string uniqueList = params->listfile+params->Groups[i]; string inputString = "dups=f, accnos=" + files[2]+".accnos"; if (params->namefile != "") { inputString += ", name=" + uniqueName; params->util.copyFile(params->namefile, uniqueName); } if (params->fastafile != "") { inputString += ", fasta=" + uniqueFasta; params->util.copyFile(params->fastafile, uniqueFasta); } if (params->listfile != "") { inputString += ", list=" + uniqueList; params->util.copyFile(params->listfile, uniqueList); } params->m->mothurOut("/******************************************/\n"); params->m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; if (params->fastafile != "") { params->util.renameFile(filenames["fasta"][0], files[0]); params->outputNames.push_back(files[0]); params->outputTypes["fasta"].push_back(files[0]); params->util.mothurRemove(uniqueFasta); } if (params->listfile != "") { params->util.renameFile(filenames["list"][0], files[1]); params->outputNames.push_back(files[1]); params->outputTypes["list"].push_back(files[1]); params->util.mothurRemove(uniqueList); } if (params->namefile != "") { params->util.renameFile(filenames["name"][0], files[3]); params->outputNames.push_back(files[3]); params->outputTypes["name"].push_back(files[3]); } params->m->mothurOut("/******************************************/\nDone.\n"); params->util.mothurRemove(files[2]+".accnos"); params->util.mothurRemove(uniqueName); if (params->m->getControl_pressed()) { for (int i = 0; i < params->outputNames.size(); i++) { params->util.mothurRemove(params->outputNames[i]); } return 0; } } return 0; } catch(exception& e) { params->m->errorOut(e, "SplitGroupCommand", "driverRunNameGroup"); exit(1); } } //********************************************************************************************************************** int driverRunCount(splitGroups2Struct* params){ try { CountTable ct; ct.readTable(params->countfile, true, false, params->Groups); if (!ct.hasGroupInfo()) { params->m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); params->m->setControl_pressed(true); } if (params->m->getControl_pressed()) { return 0; } params->Groups = ct.getNamesOfGroups(); vector listFileOutputNames; if (params->listfile != "") { //create output file names InputData input(params->listfile, "list", nullVector); ListVector* list = input.getListVector(); vector files; map >::iterator it = params->group2Files.find(params->Groups[0]); if (it != params->group2Files.end()) { files = it->second; } else { params->m->mothurOut("[ERROR]: Can find group " + params->Groups[0] + ", quitting.\n"); params->m->setControl_pressed(true); } string listFileRoot = params->outputDir + params->util.getRootName(params->util.getSimpleName(files[1])); string listExt = params->util.getExtension(files[1]); while(list != nullptr) { listFileOutputNames.push_back(listFileRoot + list->getLabel() + listExt); delete list; list = input.getListVector(); } } //GroupName -> files(fasta, list, count) for (int i = 0; i < params->Groups.size(); i++) { //Groups only contains the samples assigned to this process vector files; map >::iterator it = params->group2Files.find(params->Groups[i]); if (it != params->group2Files.end()) { files = it->second; } else { params->m->mothurOut("[ERROR]: Can find group " + params->Groups[i] + ", quitting.\n"); params->m->setControl_pressed(true); break; } //print new count file string newCountFile = files[2]; vector tempGroups; tempGroups.push_back(params->Groups[i]); ct.printCompressedTable(newCountFile, tempGroups); params->outputNames.push_back(newCountFile); params->outputTypes["count"].push_back(newCountFile); vector namesOfSeqsInGroup = ct.getNamesOfSeqs(params->Groups[i]); unordered_set thisGroupsNames = params->util.mothurConvert(namesOfSeqsInGroup); //If more than one distance, use vector of list file outputs pair fastaFilePair(params->fastafile, files[0]); pair > listFilePair(params->listfile, listFileOutputNames); params->m->mothurOut("/******************************************/\n"); params->m->mothurOut("Selecting sequences for group " + params->Groups[i] + "\n\n"); Command* getCommand = new GetSeqsCommand(thisGroupsNames, fastaFilePair, listFilePair, nullStringPair, ""); delete getCommand; if (params->listfile != "") { params->outputNames.insert(params->outputNames.end(), listFileOutputNames.begin(), listFileOutputNames.end()); params->outputTypes["list"].insert(params->outputNames.end(), listFileOutputNames.begin(), listFileOutputNames.end()); } params->m->mothurOut("/******************************************/\n\n"); if (params->fastafile != "") { params->outputNames.push_back(files[0]); params->outputTypes["fasta"].push_back(files[0]); } if (params->m->getControl_pressed()) { for (int i = 0; i < params->outputNames.size(); i++) { params->util.mothurRemove(params->outputNames[i]); } break; } } return 0; } catch(exception& e) { params->m->errorOut(e, "SplitGroupCommand", "driverRunCount"); exit(1); } } //********************************************************************************************************************** //assumes nameToGroup[seq1] -> 1,3 means seq1 should be written to outputFiles[1] and outputFiles[3] int driverFastaCount(splitGroups3Struct* params){ try { CountTable ct; ct.readTable(params->countfile, true, false, params->Groups); if (!ct.hasGroupInfo()) { params->m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); params->m->setControl_pressed(true); } if (params->m->getControl_pressed()) { return 0; } params->Groups = ct.getNamesOfGroups(); int numGroups = params->Groups.size(); for (int i = 0; i < numGroups; i++) { //Groups only contains the samples assigned to this process vector outputFastaFiles; string g = ""; int count = 0; unordered_map > nameToGroup; unordered_map >::iterator it; string groupNameOutput = ""; vector groups; while ((count < params->splitNum) && (i < numGroups) ){ //print new count file string newCountFile = params->fileRootExts[2] + params->Groups[i] + params->fileRootExts[3]; vector tempGroups; tempGroups.push_back(params->Groups[i]); ct.printCompressedTable(newCountFile, tempGroups); params->outputNames.push_back(newCountFile); params->outputTypes["count"].push_back(newCountFile); vector namesOfSeqsInGroup = ct.getNamesOfSeqs(params->Groups[i]); string newFasta = params->fileRootExts[0] + params->Groups[i] + params->fileRootExts[1]; outputFastaFiles.push_back(newFasta); for (string name : namesOfSeqsInGroup) { it = nameToGroup.find(name); if (it != nameToGroup.end()) { (it->second).push_back(count); }else { vector thisSeqsGroups; thisSeqsGroups.push_back(count); nameToGroup[name] = thisSeqsGroups; } } groups.push_back(params->Groups[i]); groupNameOutput += params->Groups[i] + "-"; count++; i++; } i--; if (groupNameOutput.length() != 0) { groupNameOutput = groupNameOutput.substr(0, groupNameOutput.length()-1); } params->m->mothurOut("\nSelecting sequences for groups " + groupNameOutput + "\n\n"); Command* getCommand = new GetSeqsCommand(nameToGroup, params->fastafile, outputFastaFiles, groups); map > fileOutputs = getCommand->getOutputFiles(); delete getCommand; map >::iterator itOut = fileOutputs.find("fasta"); if (itOut != fileOutputs.end()) { vector o = itOut->second; for (string name : o) { params->outputNames.push_back(name); params->outputTypes["fasta"].push_back(name); } } if (params->m->getControl_pressed()) { for (int i = 0; i < params->outputNames.size(); i++) { params->util.mothurRemove(params->outputNames[i]); } break; } } return 0; } catch(exception& e) { params->m->errorOut(e, "SplitGroupCommand", "driverRunCount"); exit(1); } } //********************************************************************************************************************** void SplitGroupCommand::splitCountOrGroup(bool isCount){ try { //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { splitGroups2Struct* dataBundle = new splitGroups2Struct(groupfile, countfile, namefile, Groups, lines[i+1].start, lines[i+1].end); dataBundle->setFiles(fastafile, listfile, outputdir); data.push_back(dataBundle); if (isCount) { workerThreads.push_back(new std::thread(driverRunCount, dataBundle)); }else { workerThreads.push_back(new std::thread(driverRunNameGroup, dataBundle)); } } splitGroups2Struct* dataBundle = new splitGroups2Struct(groupfile, countfile, namefile, Groups, lines[0].start, lines[0].end); dataBundle->setFiles(fastafile, listfile, outputdir); if (isCount) { driverRunCount(dataBundle); }else { driverRunNameGroup(dataBundle); } outputNames.insert(outputNames.end(), dataBundle->outputNames.begin(), dataBundle->outputNames.end()); for (itTypes = dataBundle->outputTypes.begin(); itTypes != dataBundle->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); outputNames.insert(outputNames.end(), data[i]->outputNames.begin(), data[i]->outputNames.end()); for (itTypes = data[i]->outputTypes.begin(); itTypes != data[i]->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete data[i]; delete workerThreads[i]; } } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "splitCountOrGroup"); exit(1); } } //********************************************************************************************************************** //splitGroups3Struct(string count, string fasta, string outd, vector g, int st, int en) : countfile(count), start(st), end(en) void SplitGroupCommand::splitFastaCount(){ try { //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { splitGroups3Struct* dataBundle = new splitGroups3Struct(countfile, fastafile, outputdir, Groups, lines[i+1].start, lines[i+1].end); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverFastaCount, dataBundle)); } splitGroups3Struct* dataBundle = new splitGroups3Struct(countfile, fastafile, outputdir, Groups, lines[0].start, lines[0].end); driverFastaCount(dataBundle); outputNames.insert(outputNames.end(), dataBundle->outputNames.begin(), dataBundle->outputNames.end()); for (itTypes = dataBundle->outputTypes.begin(); itTypes != dataBundle->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); outputNames.insert(outputNames.end(), data[i]->outputNames.begin(), data[i]->outputNames.end()); for (itTypes = data[i]->outputTypes.begin(); itTypes != data[i]->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete data[i]; delete workerThreads[i]; } } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "splitFastaCount"); exit(1); } } //********************************************************************************************************************** int driverSplitFlow(splitGroupsStruct* params){ try { GroupMap* groupMap = nullptr; CountTable* ct = nullptr; vector namesGroups; if (params->groupfile != "") { groupMap = new GroupMap(params->groupfile); groupMap->readMap(); namesGroups = groupMap->getNamesOfGroups(); }else if (params->countfile != ""){ ct = new CountTable(); ct->readTable(params->countfile, true, true, params->Groups); namesGroups = ct->getNamesOfGroups(); }else { params->m->mothurOut("[ERROR]: you must provide a count or group file to split by group. quitting... \n"); params->m->setControl_pressed(true); } if (params->Groups.size() == 0) { params->Groups = namesGroups; } if (params->m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; }else if (ct != nullptr) { delete ct; } return 0; } string name, flows; int count = 0; ifstream in; params->util.openInputFile(params->inputFileName, in); in >> flows; gobble(in); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } in >> name; gobble(in); flows = params->util.getline(in); gobble(in); vector thisSeqsGroups; if (groupMap != nullptr) { string thisGroup = groupMap->getGroup(name); thisSeqsGroups.push_back(thisGroup); }else if (ct != nullptr) { if (ct->inTable(name)) { thisSeqsGroups = ct->getGroups(name); } } for (int i = 0; i < thisSeqsGroups.size(); i++) { map::iterator it = params->parsedFlowData.find(thisSeqsGroups[i]); if (it != params->parsedFlowData.end()) { it->second.total++; it->second.output += name + ' ' + flows + '\n'; if (it->second.total % 100 == 0) { //buffer write ofstream out; params->util.openOutputFileAppend(it->second.filename, out); out << it->second.output; it->second.output = ""; out.close(); } } //else not in the groups we are looking to parse, so ignore } count++; } //output rest for (map::iterator it = params->parsedFlowData.begin(); it != params->parsedFlowData.end(); it++) { if (params->m->getControl_pressed()) { break; } if (it->second.output != "") { //more seqs to output ofstream out; params->util.openOutputFileAppend(it->second.filename, out); out << it->second.output; it->second.output = ""; params->outputNames.push_back(it->second.filename); params->outputTypes["flow"].push_back(it->second.filename); }else if (it->second.total == 0) { //no seqs for this group, remove file params->util.mothurRemove(it->second.filename); }else { //finished writing, just add to list of output files params->outputNames.push_back(it->second.filename); params->outputTypes["flow"].push_back(it->second.filename); } } if (params->m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; }else if (ct != nullptr) { delete ct; } return 0; } return count; } catch(exception& e) { params->m->errorOut(e, "SplitGroupCommand", "driverSplitFlow"); exit(1); } } //********************************************************************************************************************** int driverSplitFastq(splitGroupsStruct* params){ try { GroupMap* groupMap = nullptr; CountTable* ct = nullptr; vector namesGroups; if (params->groupfile != "") { groupMap = new GroupMap(params->groupfile); groupMap->readMap(); namesGroups = groupMap->getNamesOfGroups(); }else if (params->countfile != ""){ ct = new CountTable(); ct->readTable(params->countfile, true, true, params->Groups); namesGroups = ct->getNamesOfGroups(); }else { params->m->mothurOut("[ERROR]: you must provide a count or group file to split by group. quitting... \n"); params->m->setControl_pressed(true); return 0; } if (params->m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; }else if (ct != nullptr) { delete ct; } return 0; } int count = 0; ifstream in; params->util.openInputFile(params->inputFileName, in); while (!in.eof()) { if (params->m->getControl_pressed()) { break; } bool ignore = false; FastqRead thisRead(in, ignore, params->format); gobble(in); string name = thisRead.getName(); vector thisSeqsGroups; if (groupMap != nullptr) { string thisGroup = groupMap->getGroup(name); thisSeqsGroups.push_back(thisGroup); }else if (ct != nullptr) { if (ct->inTable(name)) { thisSeqsGroups = ct->getGroups(name); } } for (int i = 0; i < thisSeqsGroups.size(); i++) { map::iterator it = params->parsedFastqData.find(thisSeqsGroups[i]); if (it != params->parsedFastqData.end()) { it->second.total++; it->second.output.push_back(thisRead); if (it->second.total % 500 == 0) { //buffer write ofstream out; params->util.openOutputFileAppend(it->second.filename, out); for (int j = 0; j < it->second.output.size(); j++) { it->second.output[j].printFastq(out); } it->second.output.clear(); out.close(); } } //else not in the groups we are looking to parse, so ignore } count++; } //output rest for (map::iterator it = params->parsedFastqData.begin(); it != params->parsedFastqData.end(); it++) { if (params->m->getControl_pressed()) { break; } if (it->second.output.size() != 0) { //more seqs to output ofstream out; params->util.openOutputFileAppend(it->second.filename, out); for (int j = 0; j < it->second.output.size(); j++) { it->second.output[j].printFastq(out); } it->second.output.clear(); out.close(); params->outputNames.push_back(it->second.filename); params->outputTypes["fastq"].push_back(it->second.filename); }else if (it->second.total == 0) { //no seqs for this group, remove file params->util.mothurRemove(it->second.filename); }else { //finished writing, just add to list of output files params->outputNames.push_back(it->second.filename); params->outputTypes["fastq"].push_back(it->second.filename); } } if (params->m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; }else if (ct != nullptr) { delete ct; } return 0; } return count; } catch(exception& e) { params->m->errorOut(e, "SplitGroupCommand", "driverSplitFastq"); exit(1); } } //********************************************************************************************************************** void SplitGroupCommand::splitFastqOrFlow(string inputFile, string extension){ try { //create array of worker threads vector workerThreads; vector data; string outputfileRoot = outputdir + util.getRootName(util.getSimpleName(inputFile)); //Lauch worker threads for (int i = 0; i < processors-1; i++) { splitGroupsStruct* dataBundle = new splitGroupsStruct(groupfile, countfile, namefile, Groups, lines[i+1].start, lines[i+1].end); dataBundle->setFiles(inputFile, outputfileRoot, extension); dataBundle->setFormat(format); data.push_back(dataBundle); if (extension == ".fastq") { workerThreads.push_back(new std::thread(driverSplitFastq, dataBundle)); }else { workerThreads.push_back(new std::thread(driverSplitFlow, dataBundle)); } } splitGroupsStruct* dataBundle = new splitGroupsStruct(groupfile, countfile, namefile, Groups, lines[0].start, lines[0].end); dataBundle->setFiles(inputFile, outputfileRoot, extension); dataBundle->setFormat(format); if (extension == ".fastq") { driverSplitFastq(dataBundle); } else { driverSplitFlow(dataBundle); } outputNames.insert(outputNames.end(), dataBundle->outputNames.begin(), dataBundle->outputNames.end()); for (itTypes = dataBundle->outputTypes.begin(); itTypes != dataBundle->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); outputNames.insert(outputNames.end(), data[i]->outputNames.begin(), data[i]->outputNames.end()); for (itTypes = data[i]->outputTypes.begin(); itTypes != data[i]->outputTypes.end(); itTypes++) { outputTypes[itTypes->first].insert(outputTypes[itTypes->first].end(), itTypes->second.begin(), itTypes->second.end()); } delete data[i]; delete workerThreads[i]; } delete dataBundle; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "splitFastqOrFlow"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/splitgroupscommand.h000077500000000000000000000215421424121717000223340ustar00rootroot00000000000000#ifndef SPLITGROUPSCOMMAND_H #define SPLITGROUPSCOMMAND_H /* * splitgroupscommand.h * Mothur * * Created by westcott on 9/20/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ /* split.groups - given a group file, split sequences and names files in to separate files *.group1.fasta and .group1.names. */ #include "command.hpp" #include "groupmap.h" #include "sequence.hpp" #include "fastqread.h" #include "getseqscommand.h" //********************************************************************************************************************** struct flowOutput { string output; string filename; int total; flowOutput(string f) { filename = f; output = ""; total = 0; } flowOutput() { filename = ""; output = ""; total = 0; } flowOutput(string f, string o, int t) : filename(f), output(o), total(t) {} }; //********************************************************************************************************************** struct fastqOutput { vector output; string filename; int total; fastqOutput(string f) { filename = f; total = 0; } fastqOutput() { filename = ""; total = 0; } }; //********************************************************************************************************************** struct splitGroupsStruct { string groupfile, countfile, namefile, inputFileName, format; int start, end; bool isFastq; vector Groups; map parsedFastqData; map parsedFlowData; map > outputTypes; vector outputNames; MothurOut* m; Utils util; splitGroupsStruct(string group, string count, string name, vector g, int st, int en) : groupfile(group), countfile(count), namefile(name), start(st), end(en) { m = MothurOut::getInstance(); format = "illumina1.8+"; isFastq = true; for (int i = st; i < en; i++) { Groups.push_back(g[i]); } } void setFiles(string input, string outfileRoot, string ext) { inputFileName = input; int numFlows = 0; if (ext != ".fastq") { ifstream in; util.openInputFile(input, in); in >> numFlows; in.close(); } for (int i = 0; i < Groups.size(); i++) { string newFileName = outfileRoot + Groups[i] + ext; if (ext == ".fastq") { fastqOutput thisGroupsInfo(newFileName); parsedFastqData[Groups[i]] = thisGroupsInfo; ofstream out; util.openOutputFile(newFileName, out); out.close(); //clear file for append }else { flowOutput thisGroupsInfo(newFileName); parsedFlowData[Groups[i]] = thisGroupsInfo; isFastq = false; ofstream out; util.openOutputFile(newFileName, out); out << numFlows << endl; out.close(); //clear file for append } if (m->getControl_pressed()) { break; } } } void setFormat(string form) { format = form; } }; //********************************************************************************************************************** struct splitGroups2Struct { string groupfile, countfile, namefile, fastafile, listfile, outputDir; int start, end; vector Groups; map > group2Files; //GroupName -> files(fasta, list, count) or GroupName -> files(fasta, list, group, name) map > outputTypes; vector outputNames; MothurOut* m; Utils util; splitGroups2Struct(string group, string count, string name, vector g, int st, int en) : groupfile(group), countfile(count), namefile(name), start(st), end(en) { m = MothurOut::getInstance(); for (int i = st; i < en; i++) { Groups.push_back(g[i]); } } void setFiles(string fasta, string list, string outd) { fastafile = fasta; listfile = list; outputDir = outd; string fastaFileRoot = outputDir + util.getRootName(util.getSimpleName(fastafile)); string listFileRoot = outputDir + util.getRootName(util.getSimpleName(listfile)); string listExt = util.getExtension(listfile); string fastaExt = util.getExtension(fastafile); string countFileRoot, countExt, nameFileRoot, nameExt, groupFileRoot, groupExt; if (countfile != "") { countFileRoot = outputDir + util.getRootName(util.getSimpleName(countfile)); countExt = util.getExtension(countfile); groupFileRoot = ""; groupExt = ""; nameFileRoot = ""; nameExt = ""; }else { groupFileRoot = outputDir + util.getRootName(util.getSimpleName(groupfile)); groupExt = util.getExtension(groupfile); if (namefile != "") { nameFileRoot = outputDir + util.getRootName(util.getSimpleName(namefile)); nameExt = util.getExtension(namefile); }else { nameFileRoot = ""; nameExt = ""; } countFileRoot = ""; countExt = ""; } for (int i = 0; i < Groups.size(); i++) { if (m->getControl_pressed()) { break; } string newListFileName = listFileRoot + Groups[i] + listExt; string newFastaFileName = fastaFileRoot + Groups[i] + fastaExt; string newCountFileName = countFileRoot + Groups[i] + countExt; string newGroupFileName = groupFileRoot + Groups[i] + groupExt; string newNameFileName = nameFileRoot + Groups[i] + nameExt; vector files; if (fastafile != "") { files.push_back(newFastaFileName); }else { files.push_back(""); } if (listfile != "") { files.push_back(newListFileName); }else { files.push_back(""); } if (countfile != "") { files.push_back(newCountFileName); }else if (groupfile != "") { files.push_back(newGroupFileName); if (namefile != "") { files.push_back(newNameFileName); }//else{ files.push_back(""); } } group2Files[Groups[i]] = files; } } }; //********************************************************************************************************************** struct splitGroups3Struct { string countfile, fastafile, outputDir; int start, end, splitNum; vector Groups; vector< string > fileRootExts; // fastafileRoot, fastaFileExtentsions, countfileRoot, countFileExtension, map > outputTypes; vector outputNames; MothurOut* m; Utils util; splitGroups3Struct(string count, string fasta, string outd, vector g, int st, int en) : fastafile(fasta), countfile(count), outputDir(outd), start(st), end(en) { m = MothurOut::getInstance(); for (int i = st; i < en; i++) { Groups.push_back(g[i]); } splitNum = 10; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir = util.hasPath(fastafile); } string fastaFileRoot = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); string fastaExt = util.getExtension(fastafile); if (outputDir == "") { thisOutputDir = util.hasPath(countfile); } string countFileRoot = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); string countExt = util.getExtension(countfile); fileRootExts.push_back(fastaFileRoot); fileRootExts.push_back(fastaExt); fileRootExts.push_back(countFileRoot); fileRootExts.push_back(countExt); } }; /***************************************************************************************/ class SplitGroupCommand : public Command { public: SplitGroupCommand(string); SplitGroupCommand(vector, string fasta, string count, string o); //used by splitbySample algos ~SplitGroupCommand() = default; vector setParameters(); string getCommandName() { return "split.groups"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Split.group"; } string getDescription() { return "split a name or fasta file by group"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; vector lines; string namefile, groupfile, countfile, groups, fastafile, flowfile, fastqfile, format, listfile; vector Groups; bool abort; int processors; void splitCountOrGroup(bool); void splitFastaCount(); void splitFastqOrFlow(string, string); }; /***************************************************************************************/ #endif mothur-1.48.0/source/commands/sracommand.cpp000066400000000000000000003476731424121717000210760ustar00rootroot00000000000000// // sracommand.cpp // Mothur // // Created by SarahsWork on 10/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "sracommand.h" #include "sffinfocommand.h" #include "fastaqinfocommand.h" //********************************************************************************************************************** vector SRACommand::setParameters(){ try { CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff); CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos); CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile); CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq); CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact); CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pincludescrap("includescrap", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pincludescrap); CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark); //choose only one multiple options CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform); CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument); CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy); CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype); CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource); CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection); CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["xml"] = tempOutNames; abort = false; calledHelp = false; fileOption = 0; libLayout = "single"; //controlled vocab vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SRACommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SRACommand::getHelpString(){ try { string helpString = ""; helpString += "The make.sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n"; helpString += "The make.sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection, instrument, includescrap and trim.\n"; helpString += "The sff parameter is used to provide the original sff file.\n"; helpString += "The fastq parameter is used to provide the original fastq file.\n"; helpString += "The project parameter is used to provide your project file.\n"; helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n"; helpString += "The mimark parameter is used to provide your mimarks file. You can create the template for this file using the get.mimarkspackage command.\n"; helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2, 3 or 4 columns. The 2 column files are sff file then oligos or fastqfile then oligos or ffastq and rfastq. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. Four column files are for inputting file pairs with index files. Example: My.forward.fastq My.reverse.fastq NONE My.rindex.fastq. The keyword NONE can be used when there is not a index file for either the forward or reverse file. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. sff files only Default=True. \n"; helpString += "The includescrap parameter is used to indicate whether or not to include the scrapped sequences in your submission. The default is true.\n"; helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n"; helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n"; helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n"; helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n"; helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n"; helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n"; helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n"; helpString += "make.sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SRACommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SRACommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "xml") { pattern = "[filename],xml"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SRACommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SRACommand::SRACommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastqfile = validParameter.validFile(parameters, "fastq"); if (fastqfile == "not open") { fastqfile = ""; abort = true; } else if (fastqfile == "not found") { fastqfile = ""; } sfffile = validParameter.validFile(parameters, "sff"); if (sfffile == "not open") { sfffile = ""; abort = true; } else if (sfffile == "not found") { sfffile = ""; } setOligosParameter = false; oligosfile = validParameter.validFile(parameters, "oligos"); if (oligosfile == "not found") { oligosfile = ""; } else if(oligosfile == "not open") { abort = true; } else { current->setOligosFile(oligosfile); setOligosParameter = true; } contactfile = validParameter.validFile(parameters, "project"); if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command.\n"); abort = true; } else if(contactfile == "not open") { abort = true; } mimarksfile = validParameter.validFile(parameters, "mimark"); if (mimarksfile == "not found") { mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command.\n"); abort = true; } else if(mimarksfile == "not open") { abort = true; } file = validParameter.validFile(parameters, "file"); if (file == "not open") { file = ""; abort = true; } else if (file == "not found") { file = ""; } else { fileOption = findFileOption(); } if ((file == "") && (oligosfile == "")) { m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command.\n"); abort = true; } if ((fastqfile == "") && (file == "") && (sfffile == "")) { m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command.\n"); abort = true; } //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT platform = validParameter.valid(parameters, "platform"); if (platform == "not found") { platform = "_LS454"; } if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function if (!abort) { //don't check instrument model is platform is bad //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified instrumentModel = validParameter.valid(parameters, "instrument"); if (instrumentModel == "not found") { instrumentModel = "454_GS"; } if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function } //turn _ to spaces mothur's work around for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } } libStrategy = validParameter.valid(parameters, "libstrategy"); if (libStrategy == "not found") { libStrategy = "AMPLICON"; } if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function //turn _ to spaces mothur's work around for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } } libSource = validParameter.valid(parameters, "libsource"); if (libSource == "not found") { libSource = "METAGENOMIC"; } if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function //turn _ to spaces mothur's work around for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } } libSelection = validParameter.valid(parameters, "libselection"); if (libSelection == "not found") { libSelection = "PCR"; } if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function //turn _ to spaces mothur's work around for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } } dataType = validParameter.valid(parameters, "datatype"); if (dataType == "not found") { dataType = "metagenome"; } if (!checkCasesDataType(dataType)) { abort = true; } //error message in function //turn _ to spaces mothur's work around for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; } } orientation = validParameter.valid(parameters, "orientation"); if (orientation == "not found") { orientation = "forward"; } if ((orientation == "forward") || (orientation == "reverse")) { } else { m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n\n"); abort = true; } string temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } checkorient = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "F"; } trim = validParameter.valid(parameters, "trim"); if (trim == "not found"){ temp = "T"; } temp = validParameter.valid(parameters, "includescrap"); if (temp == "not found") { temp = "T"; } includeScrap = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "SRACommand", "SRACommand"); exit(1); } } //********************************************************************************************************************** int SRACommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } readContactFile(); if (m->getDebug()) { m->mothurOut("[DEBUG]: read contact file.\n"); } readMIMarksFile(); if (m->getDebug()) { m->mothurOut("[DEBUG]: read mimarks file.\n"); } if (oligosfile != "") { readOligos(); } if (m->getDebug()) { m->mothurOut("[DEBUG]: read oligos file.\n"); } if (m->getControl_pressed()) { return 0; } //parse files map > filesBySample; isSFF = false; if (file != "") { readFile(filesBySample); } else if (sfffile != "") { parseSffFile(filesBySample); } else if (fastqfile != "") { parseFastqFile(filesBySample); } //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files. checkGroups(filesBySample); sanityCheckMiMarksGroups(); if (m->getDebug()) { m->mothurOut("[DEBUG]: finished sanity check.\n"); } //create xml file string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(inputfile); } map variables; variables["[filename]"] = thisOutputDir + "submission."; string outputFileName = getOutputFileName("xml", variables); outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); string blankFile = thisOutputDir + "submit.ready"; ofstream outT; util.openOutputFile(blankFile, outT); outT.close(); //contacts portion //////////////////////////////////////////////////////// out << "\n"; out << "\t\n"; out << "\t\t New Submission. Generated by mothur version " + current->getVersion() + " \n"; out << "\t\t\n"; out << "\t\t\n"; out << "\t\t" + centerName + "\n"; out << "\t\t\n"; out << "\t\t\t\n"; out << "\t\t\t\t" + firstName + "\n"; out << "\t\t\t\t" + lastName + "\n"; out << "\t\t\t\n"; out << "\t\t\n"; out << "\t\t\n"; out << "\t\n"; //////////////////////////////////////////////////////// //bioproject //////////////////////////////////////////////////////// out << "\t\n"; out << "\t\t\n"; out << "\t\t\t\n"; out << "\t\t\t\t\n"; out << "\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t" + projectName + " \n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t" + projectTitle + " \n"; out << "\t\t\t\t\t\t\t

" + description + "

\n"; if (website != "") { out << "\t\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t\t" + website + "\n"; out << "\t\t\t\t\t\t\t\n"; } if (Grants.size() != 0) { for (int i = 0; i < Grants.size(); i++) { out << "\t\t\t\t\t\t\t\n"; if (Grants[i].grantTitle != "") { out << "\t\t\t\t\t\t\t\t" + Grants[i].grantTitle + "\n"; } out << "\t\t\t\t\t\t\t\t" + Grants[i].grantAgency + "\n"; out << "\t\t\t\t\t\t\t\n"; } } out << "\t\t\t\t\t\t
\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t\t\t" + dataType + " \n"; out << "\t\t\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t
\n"; out << "\t\t\t\t
\n"; out << "\t\t\t
\n"; out << "\t\t\t\n"; out << "\t\t\t\t\t\t" + projectName + " \n"; out << "\t\t\t\n"; out << "\t\t
\n"; out << "\t
\n"; //////////////////////////////////////////////////////// //bioSample //////////////////////////////////////////////////////// for (int i = 0; i < Groups.size(); i++) { string thisGroup = Groups[i]; thisGroup = util.splitWhiteSpace(thisGroup).front(); //removes leading and trailing spaces if any if ((!includeScrap) && (Groups[i] == "scrap")) {} //ignore scrap else { if (m->getControl_pressed()) { break; } out << "\t\n"; out << "\t\t\n"; out << "\t\t\t\n"; out << "\t\t\t\t\n"; out << "\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t" + thisGroup + " \n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t\t" + mimarks[Groups[i]]["sample_title"] + " \n"; out << "\t\t\t\t\t\t\t

" + mimarks[Groups[i]]["description"] + "

\n"; out << "\t\t\t\t\t\t
\n"; out << "\t\t\t\t\t\t\n"; string organismName = "metagenome"; map::iterator itOrganism = Group2Organism.find(Groups[i]); if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it. out << "\t\t\t\t\t\t\t" + organismName + " \n"; out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t\t" + packageType + "\n"; out << "\t\t\t\t\t\t\n"; //add biosample required attributes map >:: iterator it = mimarks.find(Groups[i]); if (it != mimarks.end()) { map categories = it->second; for (map:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) { if (m->getControl_pressed()) { break; } out << "\t\t\t\t\t\t\tfirst + "\">" + it2->second + "\n"; } } out << "\t\t\t\t\t\t\n"; out << "\t\t\t\t\t
\n"; out << "\t\t\t\t
\n"; out << "\t\t\t
\n"; out << "\t\t\t\n"; out << "\t\t\t\t" + thisGroup + "\n"; out << "\t\t\t\n"; out << "\t\t
\n"; out << "\t
\n"; } } map::iterator itGroup; //File objects //////////////////////////////////////////////////////// for (int i = 0; i < Groups.size(); i++) { string thisGroup = Groups[i]; thisGroup = util.splitWhiteSpace(thisGroup).front(); //removes leading and trailing spaces if any if ((!includeScrap) && (Groups[i] == "scrap")) {} //ignore scrap else { vector thisGroupsFiles = filesBySample[Groups[i]]; string thisGroupsBarcode, thisGroupsPrimer; if (libLayout == "paired") { thisGroupsBarcode = "."; thisGroupsPrimer = "."; } else { thisGroupsBarcode = ""; thisGroupsPrimer = ""; } itGroup = Group2Barcode.find(Groups[i]); if (itGroup != Group2Barcode.end()) { if (fileOption != 5) { thisGroupsBarcode = itGroup->second; } //don't include barcodes if using index files. } itGroup = Group2Primer.find(Groups[i]); if (itGroup != Group2Primer.end()) { thisGroupsPrimer = itGroup->second; } for (int j = 0; j < thisGroupsFiles.size(); j++) { string libId = util.getSimpleName(thisGroupsFiles[j]) + "." + thisGroup; if (m->getControl_pressed()) { break; } out << "\t\n"; out << "\t\t\n"; if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames vector pieces = util.splitWhiteSpace(thisGroupsFiles[j]); libId = util.getSimpleName(pieces[0]) + "." + thisGroup; out << "\t\t\t\n"; out << "\t\t\t\tgeneric-data \n"; out << "\t\t\t\n"; out << "\t\t\t\n"; out << "\t\t\t\tgeneric-data \n"; out << "\t\t\t\n"; //attributes if (linkers.size() != 0) { string linkerString = ""; //linker size forced to 1 for (int k = 0; k < linkers.size(); k++) { linkerString += linkers[k] + ";"; } linkerString = linkerString.substr(0, linkerString.length()-1); out << "\t\t\t" + linkerString + "\n"; out << "\t\t\t" + toString(ldiffs) + "\n"; } if (thisGroupsBarcode != ".") { string barcodeString = ""; vector thisBarcodes; util.splitAtChar(thisGroupsBarcode, thisBarcodes, '.'); if (thisBarcodes[0] != "NONE") { barcodeString += thisBarcodes[0] + ";"; } if (thisBarcodes[1] != "NONE") { barcodeString += thisBarcodes[1] + ";"; }//forward barcode + reverse barcode barcodeString = barcodeString.substr(0, barcodeString.length()-1); out << "\t\t\t" + barcodeString + "\n"; out << "\t\t\t" + toString(bdiffs) + "\n"; } if (spacers.size() != 0) { string spacerString = ""; //spacer size forced to 1 for (int k = 0; k < spacers.size(); k++) { spacerString += spacers[k] + ";"; } spacerString = spacerString.substr(0, spacerString.length()-1); out << "\t\t\t" + spacerString + "\n"; out << "\t\t\t" + toString(sdiffs) + "\n"; } if (thisGroupsPrimer != ".") { string primerString = ""; vector thisPrimers; util.splitAtChar(thisGroupsPrimer, thisPrimers, '.'); if (thisPrimers[0] != "") { primerString += thisPrimers[0] + ";"; } if (thisPrimers[1] != "") { primerString += thisPrimers[1] + ";"; } if (primerString != "") { primerString = primerString.substr(0, primerString.length()-1); out << "\t\t\t" + primerString + "\n"; out << "\t\t\t" + toString(pdiffs) + "\n"; } } out << "\t\t\t" + libId + "\n"; out << "\t\t\t" + libStrategy + "\n"; out << "\t\t\t" + libSource + "\n"; out << "\t\t\t" + libSelection + "\n"; out << "\t\t\t" + libLayout + "\n"; out << "\t\t\t" + instrumentModel + "\n"; out << "\t\t\t" + mimarks[Groups[i]]["seq_methods"] + "\n"; }else { //single out << "\t\t\t\n"; out << "\t\t\t\tgeneric-data \n"; out << "\t\t\t\n"; //attributes //linkers -> barcodes -> spacers -> primers if (linkers.size() != 0) { string linkerString = ""; for (int k = 0; k < linkers.size(); k++) { linkerString += linkers[k] + ";"; } linkerString = linkerString.substr(0, linkerString.length()-1); out << "\t\t\t" + linkerString + "\n"; out << "\t\t\t" + toString(ldiffs) + "\n"; } if (thisGroupsBarcode != "") { out << "\t\t\t" + thisGroupsBarcode + "\n"; out << "\t\t\t" + toString(bdiffs) + "\n"; } if (spacers.size() != 0) { string spacerString = ""; for (int k = 0; k < spacers.size(); k++) { spacerString += spacers[k] + ";"; } spacerString = spacerString.substr(0, spacerString.length()-1); out << "\t\t\t" + spacerString + "\n"; out << "\t\t\t" + toString(sdiffs) + "\n"; } if (thisGroupsPrimer != "") { out << "\t\t\t" + thisGroupsPrimer + "\n"; out << "\t\t\t" + toString(pdiffs) + "\n"; } //out << "\t\t\t" + orientation + "\n"; out << "\t\t\t" + libId + "\n"; out << "\t\t\t" + libStrategy + "\n"; out << "\t\t\t" + libSource + "\n"; out << "\t\t\t" + libSelection + "\n"; out << "\t\t\t" + libLayout + "\n"; out << "\t\t\t" + instrumentModel + "\n"; out << "\t\t\t" + mimarks[Groups[i]]["seq_methods"] + "\n"; } ///////////////////bioProject info out << "\t\t\t\n"; out << "\t\t\t\t\n"; out << "\t\t\t\t\t" + projectName + " \n"; out << "\t\t\t\t\n"; out << "\t\t\t\n"; //////////////////bioSample info out << "\t\t\t\n"; out << "\t\t\t\t\n"; out << "\t\t\t\t\t" + thisGroup + "\n"; out << "\t\t\t\t\n"; out << "\t\t\t\n"; //libID out << "\t\t\t\n"; if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames vector pieces = util.splitWhiteSpace(thisGroupsFiles[j]); libId = util.getSimpleName(pieces[0]) + "." + thisGroup; } out << "\t\t\t\t" + libId + "\n"; out << "\t\t\t\n"; out << "\t\t\n"; out << "\t\n"; } } } out << "
\n"; out.close(); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output files created by command m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "execute"); exit(1); } } //********************************************************************************************************************** int SRACommand::readContactFile(){ try { lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = ""; projectTitle = ""; ownership = "owner"; ifstream in; util.openInputFile(contactfile, in); while(!in.eof()) { if (m->getControl_pressed()) { break; } string key, value; in >> key; gobble(in); value = util.getline(in); gobble(in); if (!util.isASCII(value)) { m->mothurOut("[ERROR]: " + value + " contains non ASCII characters. Submission entries cannot contains non ASCII characters, please correct.\n"); m->setControl_pressed(true); } for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); } if (key == "USERNAME") { submissionName = value; } else if (key == "LAST") { lastName = value; } else if (key == "FIRST") { firstName = value; } else if (key == "EMAIL") { email = value; } else if (key == "CENTER") { centerName = value; } else if (key == "TYPE") { centerType = value; for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); } if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {} else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated.\n"); m->setControl_pressed(true); } }else if (key == "OWNERSHIP") { ownership = value; for (int i = 0; i < ownership.length(); i++) { ownership[i] = tolower(ownership[i]); } if ((ownership == "owner") || (ownership == "participant")) {} else { m->mothurOut("[ERROR]: " + ownership + " is not a ownership option. Valid ownership options are owner or participant. This is a controlled vocabulary section in the XML file that will be generated.\n"); m->setControl_pressed(true); } }else if (key == "DESCRIPTION") { description = value; } else if (key == "WEBSITE") { website = value; } else if (key == "PROJECTNAME") { projectName = value; } else if (key == "PROJECTTITLE") { projectTitle = value; } else if (key == "GRANT") { string temp = value; vector values; util.splitAtComma(temp, values); Grant thisGrant; for (int i = 0; i < values.size(); i++) { vector items; util.splitAtChar(values[i], items, '='); if (items.size() != 2) { m->mothurOut("[ERROR]: error parsing grant info for line \"" + value + "\", skipping it.\n"); break; } else { //remove any leading spaces in tag int i; for (i = 0; i < items[0].length(); i++) { if (isspace(items[0][i])) {}else {break;} } items[0] = items[0].substr(i); if (items[0] == "id") { thisGrant.grantId = items[1]; } else if (items[0] == "title") { thisGrant.grantTitle = items[1]; } else if (items[0] == "agency") { thisGrant.grantAgency = items[1]; } else { m->mothurOut("[ERROR]: unknown identifier '" + items[0] + "', skipping it.\n"); } } } if ((thisGrant.grantId == "") || (thisGrant.grantAgency == "")) { m->mothurOut("[ERROR]: Missing info for line \"" + value + "\", skipping it. Note: the id and agency fields are required. Example: Grant id=yourID, agency=yourAgency.\n"); } else { Grants.push_back(thisGrant); } } } in.close(); if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting.\n"); m->setControl_pressed(true); } if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting.\n"); m->setControl_pressed(true); } if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting.\n"); m->setControl_pressed(true); } if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting.\n"); m->setControl_pressed(true); } if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting.\n"); m->setControl_pressed(true); } if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting.\n"); m->setControl_pressed(true); } if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting.\n"); m->setControl_pressed(true); } if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting.\n"); m->setControl_pressed(true); } if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "readContactFile"); exit(1); } } //********************************************************************************************************************** //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water //all packages require: *sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon //air: *altitude //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host //microbial, sediment, soil: *depth *elev //water: *depth int SRACommand::readMIMarksFile(){ try { //acceptable organisms vector acceptableOrganisms; bool organismError = false; //ecological acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome"); //oganismal acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("synthetic metagenome"); acceptableOrganisms.push_back("metagenome"); vector requiredFieldsForPackage; requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("description"); requiredFieldsForPackage.push_back("sample_title"); requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("env_biome"); requiredFieldsForPackage.push_back("env_feature"); requiredFieldsForPackage.push_back("env_material"); requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon"); requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("organism"); ifstream in; util.openInputFile(mimarksfile, in); //read comments string temp; packageType = ""; while(!in.eof()) { if (m->getControl_pressed()) { break; } temp = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + temp + "\n"); } if (temp[0] == '#') { int pos = temp.find("MIMARKS.survey"); if (pos != string::npos) { packageType = temp.substr(1); } } else{ break; } //hit headers line } //in future may want to add parsing of format header.... vector headers; util.splitAtChar(temp, headers, '\t'); util.removeBlanks(headers); //remove * from required's for (int i = 0; i < headers.size(); i++) { if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); } if (m->getDebug()) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); } } if (m->getDebug()) { m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n"); } if (packageType == "MIMARKS.survey.air.4.0") { requiredFieldsForPackage.push_back("altitude"); } if (packageType == "MIMARKS.survey.host-associated.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.human-associated.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.human-gut.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.human-oral.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.human-skin.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.human-vaginal.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.microbial.4.0") { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); } if (packageType == "MIMARKS.survey.miscellaneous.4.0") {} if (packageType == "MIMARKS.survey.plant-associated.4.0") { requiredFieldsForPackage.push_back("host"); } if (packageType == "MIMARKS.survey.sediment.4.0") { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); } if (packageType == "MIMARKS.survey.soil.4.0") { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); } if (packageType == "MIMARKS.survey.wastewater.4.0") {} if (packageType == "MIMARKS.survey.water.4.0") { requiredFieldsForPackage.push_back("depth"); } if (!util.isSubset(headers, requiredFieldsForPackage)){ string requiredFields = ""; set sanity; for (int i = 0; i < headers.size(); i++) { sanity.insert(headers[i]); } string missing = ""; for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; if (sanity.count(requiredFieldsForPackage[i]) == 0) { missing += requiredFieldsForPackage[i] + ", "; } } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1]; if (sanity.count(requiredFieldsForPackage[requiredFieldsForPackage.size()-1]) == 0) { missing += requiredFieldsForPackage[requiredFieldsForPackage.size()-1]; } m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ". Missing " + missing + "\n"); m->setControl_pressed(true); in.close(); return 0; } map allNA; for (int i = 1; i < headers.size(); i++) { allNA[headers[i]] = true; } while(!in.eof()) { if (m->getControl_pressed()) { break; } temp = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + temp + "\n"); } string original = temp; vector linePieces; util.splitAtChar(temp, linePieces, '\t'); util.removeBlanks(linePieces); for (int i = 0; i < linePieces.size(); i++) { if (!util.isASCII(linePieces[i] )) { m->mothurOut("[ERROR]: " + linePieces[i] + " contains non ASCII characters. Submission entries cannot contains non ASCII characters, please correct.\n"); m->setControl_pressed(true); break; } } if (m->getControl_pressed()) { break; } if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->setControl_pressed(true); } else { map >:: iterator it = mimarks.find(linePieces[0]); if (it == mimarks.end()) { map categories; //start after *sample_name for (int i = 1; i < headers.size(); i++) { //check the users inputs for appropriate organisms if (headers[i] == "organism") { if (!util.inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism organismError = true; m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i]; }else { if (linePieces[i] == "metagenome") { m->mothurOut("[WARNING]: metagenome is an acceptable organism, but NCBI would prefer a more specific choice if possible. Here is a link to the organism choices and descriptions, http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=408169&lvl=3&keep=1&srchmode=1&unlock. To request the addition of a taxonomy to the list, please contact Anjanette Johnston at johnston@ncbi.nlm.nih.gov.\n"); } } Group2Organism[linePieces[0]] = linePieces[i]; } //check date format // BioSample has several accepted date formats like "DD-Mmm-YYYY" (eg., 30-Oct-2010) or standard "YYYY-mm-dd" or "YYYY-mm" (eg 2010-10-30, 2010-10). if (headers[i] == "collection_date") { //will autocorrect if possible bool okay = checkDateFormat(linePieces[i]); if (!okay) { m->setControl_pressed(true); } } if (headers[i] == "lat_lon") { //check format string lat_lon = linePieces[i]; vector latLon = util.splitWhiteSpace(lat_lon); if (latLon.size() > 4) { m->mothurOut("[ERROR]: " + linePieces[i] + " is not in correct format. Specify as degrees latitude and longitude in format 'd[d.dddd] N|S d[dd.dddd] W|E', eg, 38.98 N 77.11 W., quitting.\n"); m->setControl_pressed(true); }else if (latLon.size() == 4) { // 38.98 N 77.11 W bool isOkay = true; if (!util.isNumeric1(latLon[0])) { isOkay = false; } if (!util.isNumeric1(latLon[2])) { isOkay = false; } latLon[1] = toupper(latLon[1][0]); if ((latLon[1] != "N") && (latLon[1] != "S")) { isOkay = false; } latLon[3] = toupper(latLon[3][0]); if ((latLon[3] != "E") && (latLon[3] != "W")) { isOkay = false; } if (!isOkay) { m->mothurOut("[ERROR]: " + linePieces[i] + " is not in correct format. Specify as degrees latitude and longitude in format 'd[d.dddd] N|S d[dd.dddd] W|E', eg, 38.98 N 77.11 W., quitting.\n"); m->setControl_pressed(true); } }else if (latLon.size() == 2) { // 38.98N 77.11W bool isOkay = true; int firstLength = latLon[0].length(); int secondLength = latLon[1].length(); string NSDir = latLon[0].substr(firstLength-1, 1); string EWDir = latLon[1].substr(secondLength-1, 1); string degreeFirst = latLon[0].substr(0, firstLength-1); string degreeSecond = latLon[1].substr(0, secondLength-1); if (!util.isNumeric1(degreeFirst)) { isOkay = false; } if (!util.isNumeric1(degreeSecond)) { isOkay = false; } NSDir = toupper(NSDir[0]); if ((NSDir != "N") && (NSDir != "S")) { isOkay = false; } EWDir = toupper(EWDir[0]); if ((EWDir != "E") && (EWDir != "W")) { isOkay = false; } if (!isOkay) { m->mothurOut("[ERROR]: " + linePieces[i] + " is not in correct format. Specify as degrees latitude and longitude in format 'd[d.dddd] N|S d[dd.dddd] W|E', eg, 38.98 N 77.11 W., quitting.\n"); m->setControl_pressed(true); }else { linePieces[i] = degreeFirst + " " + NSDir + " " + degreeSecond + " " + EWDir; } }else { m->mothurOut("[ERROR]: " + linePieces[i] + " is not in correct format. Specify as degrees latitude and longitude in format 'd[d.dddd] N|S d[dd.dddd] W|E', eg, 38.98 N 77.11 W., quitting.\n"); m->setControl_pressed(true); } } if (linePieces[i] == "#N/B") { m->mothurOut("[WARNING]: #N/B is not acceptable. Unknown or inapplicable fields MUST be assigned 'missing' value, correcting.\n"); linePieces[i] = "missing"; } if (linePieces[i] != "missing") { allNA[headers[i]] = false; } categories[headers[i]] = linePieces[i]; } //does this sample already match an existing sample? bool isOkaySample = true; for (map >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) { if (m->getControl_pressed()) { break; } bool allSame = true; for (int i = 1; i < headers.size(); i++) { if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; } } if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->setControl_pressed(true); isOkaySample = false; } } if (isOkaySample) { mimarks[linePieces[0]] = categories; } } else { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->setControl_pressed(true); } } } in.close(); //add in values for "scrap" group map categories; //start after *sample_name for (int i = 1; i < headers.size(); i++) { categories[headers[i]] = "missing"; if (headers[i] == "organism") { categories[headers[i]] = "metagenome"; } if (headers[i] == "description") { categories[headers[i]] = "these sequences were scrapped"; } if (headers[i] == "sample_title") { categories[headers[i]] = "these sequences were scrapped"; } } mimarks["scrap"] = categories; Group2Organism["scrap"] = "metagenome"; if (organismError) { string organismTypes = ""; for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; } organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1]; m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n"); } return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "readMIMarksFile"); exit(1); } } //********************************************************************************************************************** /* file option 1 sfffile1 oligosfile1 sfffile2 oligosfile2 ... file option 2 fastqfile1 oligosfile1 fastqfile2 oligosfile2 ... file option 3 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 4 group fastqfile fastqfile group fastqfile fastqfile group fastqfile fastqfile ... file option 5 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file ... */ int SRACommand::readFile(map >& files){ try { bool runParseFastqFile = false; bool using3NONE = false; inputfile = file; files.clear(); fileOption = 0; FileFile dataFile(inputfile, "sra"); vector< vector > dataFiles = dataFile.getFiles(); int dataFileFormat = dataFile.getFileFormat(); map file2Group = dataFile.getFile2Group(); if (dataFile.containsIndexFiles() && (!setOligosParameter)) { m->mothurOut("[ERROR]: You must have an oligosfile with the index file option. Aborting. \n"); m->setControl_pressed(true); } if (dataFileFormat == 2) { //3 column file if (setOligosParameter) { m->mothurOut("[ERROR]: You cannot have an oligosfile and 3 column file option at the same time. Aborting. \n"); m->setControl_pressed(true); } } for (int i = 0; i < dataFiles.size(); i++) { string group = file2Group[i]; string thisFileName1, thisFileName2, findex, rindex; thisFileName1 = dataFiles[i][0]; thisFileName2 = dataFiles[i][1]; findex = dataFiles[i][2]; rindex = dataFiles[i][3]; if (dataFileFormat == 1) { //2 column libLayout = "single"; if (!setOligosParameter) { //process pair int pos = thisFileName1.find(".sff"); if (pos != string::npos) {//these files are sff files fileOption = 1; isSFF = true; sfffile = thisFileName1; oligosfile = thisFileName2; if (m->getDebug()) { m->mothurOut("[DEBUG]: about to read oligos\n"); } readOligos(); if (m->getDebug()) { m->mothurOut("[DEBUG]: about to parse\n"); } parseSffFile(files); if (m->getDebug()) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); } }else{ fileOption = 2; isSFF = false; fastqfile = thisFileName1; oligosfile = thisFileName2; if (m->getDebug()) { m->mothurOut("[DEBUG]: about to read oligos\n"); } readOligos(); if (m->getDebug()) { m->mothurOut("[DEBUG]: about to parse\n"); } parseFastqFile(files); if (m->getDebug()) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); } } }else { runParseFastqFile = true; libLayout = "paired"; fileOption = 3; } }else if (dataFileFormat == 2) { //3 column if ((thisFileName2 != "none") && (thisFileName2 != "NONE" )) { if (!using3NONE) { libLayout = "paired"; } else { m->mothurOut("[ERROR]: You cannot have a 3 column file with paired and unpaired files at the same time. Aborting. \n"); m->setControl_pressed(true); } } else { thisFileName2 = ""; libLayout = "single"; using3NONE = true; } string group = file2Group[i]; string thisname = thisFileName1 + " " + thisFileName2; if (using3NONE) { thisname = thisFileName1; } map >::iterator it = files.find(group); if (it == files.end()) { Groups.push_back(group); vector temp; temp.push_back(thisname); files[group] = temp; }else { files[group].push_back(thisname); } fileOption = 4; }else if (dataFileFormat == 3) { //4 column if ((findex == "none") || (findex == "NONE")){ findex = ""; } if ((rindex == "none") || (rindex == "NONE")){ rindex = ""; } libLayout = "paired"; runParseFastqFile = true; fileOption = 5; } } if (runParseFastqFile) { vector theseFiles; string commandString = "fasta=f, qfile=f, file=" + file; commandString += ", oligos=" + oligosfile; //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); } if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); } if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } if (util.isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: fastq.info(" + commandString + ")\n"); current->setMothurCalling(true); Command* fastqinfoCommand = new ParseFastaQCommand(commandString); fastqinfoCommand->execute(); map > filenames = fastqinfoCommand->getOutputFiles(); map >::iterator it = filenames.find("fastq"); if (it != filenames.end()) { theseFiles = it->second; } else { m->setControl_pressed(true); } // error in sffinfo delete fastqinfoCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); for (int i = 0; i < theseFiles.size(); i++) { outputNames.push_back(theseFiles[i]); } mapGroupToFile(files, theseFiles); fixMap(files); } if (files.size() == 0) { m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "readFile"); exit(1); } } //********************************************************************************************************************** int SRACommand::parseSffFile(map >& files){ try { vector theseFiles; inputfile = sfffile; libLayout = "single"; //controlled vocab isSFF = true; //run sffinfo to parse sff file into individual sampled sff files string commandString = "sff=" + sfffile; commandString += ", oligos=" + oligosfile; //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); } if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); } if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } if (util.isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } if (util.isTrue(trim)) { commandString += ", trim=" + trim; } m->mothurOutEndLine(); m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: sffinfo(" + commandString + ")\n"); current->setMothurCalling(true); Command* sffinfoCommand = new SffInfoCommand(commandString); sffinfoCommand->execute(); map > filenames = sffinfoCommand->getOutputFiles(); map >::iterator it = filenames.find("sff"); if (it != filenames.end()) { theseFiles = it->second; } else { m->setControl_pressed(true); } // error in sffinfo delete sffinfoCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); for (int i = 0; i < theseFiles.size(); i++) { outputNames.push_back(theseFiles[i]); } mapGroupToFile(files, theseFiles); return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "readFile"); exit(1); } } //********************************************************************************************************************** int SRACommand::parseFastqFile(map >& files){ try { vector theseFiles; inputfile = fastqfile; //run sffinfo to parse sff file into individual sampled sff files string commandString = "fasta=f, qfile=f, fastq=" + fastqfile; commandString += ", oligos=" + oligosfile; //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); } if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); } if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); } if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); } if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); } if (util.isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: fastq.info(" + commandString + ")\n"); current->setMothurCalling(true); Command* fastqinfoCommand = new ParseFastaQCommand(commandString); fastqinfoCommand->execute(); map > filenames = fastqinfoCommand->getOutputFiles(); map >::iterator it = filenames.find("fastq"); if (it != filenames.end()) { theseFiles = it->second; } else { m->setControl_pressed(true); } // error in sffinfo delete fastqinfoCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); for (int i = 0; i < theseFiles.size(); i++) { outputNames.push_back(theseFiles[i]); } mapGroupToFile(files, theseFiles); return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "readFile"); exit(1); } } //*************************************************************************************************************** //maps group to file int SRACommand::mapGroupToFile(map >& files, vector theseFiles){ try { for (int i = 0; i < Groups.size(); i++) { //correct filename issues if group name contains '-' characters string thisGroup = Groups[i]; for (int k = 0; k < thisGroup.length(); k++) { if (thisGroup[k] == '-') { thisGroup[k] = '_'; } } for (int j = 0; j < theseFiles.size(); j++) { string tempName = util.getSimpleName(theseFiles[j]); int pos = theseFiles[j].find(thisGroup); if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name if (theseFiles[j][pos+thisGroup.length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil. map >::iterator it = files.find(thisGroup); if (it == files.end()) { vector temp; temp.push_back(theseFiles[j]); files[Groups[i]] = temp; }else { files[Groups[i]].push_back(theseFiles[j]); } } } } } return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "mapGroupToFile"); exit(1); } } //*************************************************************************************************************** //fixes map to files for index files parse int SRACommand::fixMap(map >& files){ try { for (map >::iterator it = files.begin(); it != files.end(); it++) { vector theseFiles = it->second; if (theseFiles.size() != 2) { m->mothurOut("[ERROR]: unexpected number of files, quitting. \n."); m->setControl_pressed(true); } if (m->getControl_pressed()) { return 0; } vector temp; temp.resize(1, ""); for (int j = 0; j < theseFiles.size(); j++) { string tempName = util.getSimpleName(theseFiles[j]); int pos = theseFiles[j].find("forward.fastq"); if (pos != string::npos) { //you have a potential match for the forward file if (temp[0] == "") { temp[0] = theseFiles[j]; }else { string reverse = temp[0]; temp[0] = theseFiles[j] + " " + reverse; } }else { pos = theseFiles[j].find("reverse.fastq"); if (pos != string::npos) { //you have a potential match for the reverse file if (temp[0] == "") { temp[0] = theseFiles[j]; }else { temp[0] += " " + theseFiles[j]; } }else { m->mothurOut("[ERROR]: unexpected parsing results, quitting. \n."); m->setControl_pressed(true); //shouldn't get here unless the fastq.info changes the format of the output filenames??? } } } it->second = temp; } return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "fixMap"); exit(1); } } //*************************************************************************************************************** //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files. int SRACommand::checkGroups(map >& files){ try { vector newGroups; for (int i = 0; i < Groups.size(); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: group " + toString(i) + " = " + Groups[i] + "\n"); } map >::iterator it = files.find(Groups[i]); //no files for this group, remove it if (it == files.end()) { } else { newGroups.push_back(Groups[i]); } } Groups = newGroups; return 0; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkGroups"); exit(1); } } //*************************************************************************************************************** bool SRACommand::readOligos(){ try { set uniqueNames; uniqueNames.insert("scrap"); Oligos oligos; if ((fileOption == 3) || (fileOption == 5)) { oligos.read(oligosfile, false); } //like make.contigs else { oligos.read(oligosfile); } if (m->getControl_pressed()) { return false; } //error in reading oligos if (oligos.hasPairedPrimers() || oligos.hasPairedBarcodes()) { pairedOligos = true; libLayout = "paired"; } else { pairedOligos = false; libLayout = "single"; } vector thisFilesLinkers = oligos.getLinkers(); for (int i = 0; i < thisFilesLinkers.size(); i++) { linkers.push_back(thisFilesLinkers[i]); break; } if (thisFilesLinkers.size() > 1) { m->mothurOut("[WARNING]: the make.sra command only allows for the use of one linker at a time, disregarding all but first one.\n"); } vector thisFilesSpacers = oligos.getSpacers(); for (int i = 0; i < thisFilesSpacers.size(); i++) { spacers.push_back(thisFilesSpacers[i]); break; } if (thisFilesSpacers.size() > 1) { m->mothurOut("[WARNING]: the make.sra command only allows for the use of one spacer at a time, disregarding all but first one.\n"); } Groups = oligos.getSRAGroupNames(); return true; } catch(exception& e) { m->errorOut(e, "SRACommand", "readOligos"); exit(1); } } //********************************************************************/ //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT bool SRACommand::checkCasesPlatforms(string& platform){ try { string original = platform; bool isOkay = true; //remove users possible case errors for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); } //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { } else { isOkay = false; } if (isOkay) { if (platform == "454") { platform = "_LS454"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT.\n"); abort = true; } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesPlatforms"); exit(1); } } //********************************************************************/ //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){ try { string original = instrumentModel; bool isOkay = true; //remove users possible case errors for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); } //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { } else { isOkay = false; } if (isOkay) { if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; } if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; } if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified.\n"); abort = true; } }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { } else { isOkay = false; } if (isOkay) { if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; } if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; } if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; } if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; } if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; } if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; } if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified.\n"); abort = true; } }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { } else { isOkay = false; } if (isOkay) { if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; } if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified.\n"); abort = true; } }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { } else { isOkay = false; } if (isOkay) { if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; } if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified.\n"); abort = true; } } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesInstrumentModels"); exit(1); } } //********************************************************************************************************************** //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER bool SRACommand::checkCasesLibStrategy(string& libStrategy){ try { string original = libStrategy; bool isOkay = true; //remove users possible case errors for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); } if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { } else { isOkay = false; } if (isOkay) { if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; } if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; } if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; } if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; } if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; } if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; } if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; } if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; } if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; } if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER.\n"); abort = true; } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesLibStrategy"); exit(1); } } //********************************************************************************************************************** //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER bool SRACommand::checkCasesLibSource(string& libSource){ try { string original = libSource; bool isOkay = true; //remove users possible case errors for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); } if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { } else { isOkay = false; } if (isOkay) { }else { m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER.\n"); abort = true; } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesLibStrategy"); exit(1); } } //********************************************************************************************************************** //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified bool SRACommand::checkCasesLibSelection(string& libSelection){ try { string original = libSelection; bool isOkay = true; //remove users possible case errors for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); } if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { } else { isOkay = false; } if (isOkay) { if (libSelection == "CDNA") { libSelection = "cDNA"; } if (libSelection == "CHIP") { libSelection = "ChIP"; } if (libSelection == "MNASE") { libSelection = "MNase"; } if (libSelection == "DNASE") { libSelection = "DNAse"; } if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; } if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; } if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; } if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; } if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; } if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; } if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; } if (libSelection == "OTHER") { libSelection = "other"; } if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; } }else { m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified.\n"); abort = true; } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesLibSelection"); exit(1); } } //********************************************************************************************************************** //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER bool SRACommand::checkCasesDataType(string& dataType){ try { string original = dataType; bool isOkay = true; //remove users possible case errors for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); } if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { dataType = original; } else { isOkay = false; } if (isOkay) { }else { m->mothurOut("[ERROR]: " + original + " is not a valid datatype option. Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER.\n"); abort = true; } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkCasesDataType"); exit(1); } } //********************************************************************************************************************** bool SRACommand::sanityCheckMiMarksGroups(){ try { bool isOkay = true; for (int i = 0; i < Groups.size(); i++) { if (m->getControl_pressed()) { break; } map >::iterator it = mimarks.find(Groups[i]); if (it == mimarks.end()) { isOkay = false; m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n"); } } if (!isOkay) { m->setControl_pressed(true); } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups"); exit(1); } } //********************************************************************************************************************** //BioSample has several accepted date formats like "DD-Mmm-YYYY" (eg., 30-Oct-2010) or standard "YYYY-mm-dd" or "YYYY-mm" (eg 2010-10-30, 2010-10). bool SRACommand::checkDateFormat(string& date){ try { string thisYear, thisMonth, thisDay; util.getCurrentDate(thisYear, thisMonth, thisDay); //used to make sure future dates are not entered for collection dates. int thisYearNumber; util.mothurConvert(thisYear, thisYearNumber); for (int i = 0; i < date.length(); i++) { if (date[i] == '/') { date[i] = '-'; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: date = " + date + "\n"); } map months; months["Jan"] = 31; months["Feb"] = 29; months["Mar"] = 31; months["Apr"] = 30; months["Jun"] = 30; months["May"] = 31; months["Jul"] = 31; months["Aug"] = 31; months["Sep"] = 30;months["Oct"] = 31; months["Nov"] = 30; months["Dec"] = 31; map monthsN; monthsN["01"] = 31; monthsN["02"] = 29; monthsN["03"] = 31; monthsN["04"] = 30; monthsN["06"] = 30; monthsN["05"] = 31; monthsN["07"] = 31; monthsN["08"] = 31; monthsN["09"] = 30;monthsN["10"] = 31; monthsN["11"] = 30; monthsN["12"] = 31; map convertMonths; convertMonths["Jan"] = 1; convertMonths["Feb"] = 2; convertMonths["Mar"] = 3; convertMonths["Apr"] = 4; convertMonths["Jun"] = 6; convertMonths["May"] = 5; convertMonths["Jul"] = 7; convertMonths["Aug"] = 8; convertMonths["Sep"] = 9;convertMonths["Oct"] = 10; convertMonths["Nov"] = 11; convertMonths["Dec"] = 12; bool isOkay = true; if (util.containsAlphas(date)) { // then format == "DD-Mmm-YYYY", "Mmm-YYYY" vector pieces; if (date.find_first_of('-') != string::npos) { util.splitAtDash(date, pieces); } else { pieces = util.splitWhiteSpace(date); } if (m->getDebug()) { m->mothurOut("[DEBUG]: in alpha\n"); } //check "Mmm-YYYY" bool checkMonth = false; if (pieces.size() == 2) { //"Mmm-YYYY" if (m->getDebug()) { m->mothurOut("[DEBUG]: pieces = 2 -> " + pieces[0] + '\t' + pieces[1] + "\n"); } map::iterator it; it = months.find(pieces[0]); //is this a valid month if (it != months.end()) { int yearNumber; util.mothurConvert(pieces[1], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } if (pieces[1].size() != 4) { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } }else { //see if we can correct if pieces[0][0] = toupper(pieces[0][0]); for (int i = 1; i < pieces[0].size(); i++) { pieces[0][i] = tolower(pieces[0][i]); } //look again it = months.find(pieces[0]); //is this a valid month if (it == months.end()) { m->mothurOut("[ERROR] " + pieces[0] + " is not a valid month. Looking for ""Mmm-YYYY\" format.\n"); isOkay = false; } else { int yearNumber; util.mothurConvert(pieces[1], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } if (pieces[1].size() != 4) { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } } } if (isOkay) { if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[0]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } else { date = pieces[0] + "-" + pieces[1]; } }else { date = pieces[0] + "-" + pieces[1]; } } }else if (pieces.size() == 3) { //DD-Mmm-YYYY" bool checkMonth = false; if (m->getDebug()) { m->mothurOut("[DEBUG]: pieces = 3 -> " + pieces[0] + '\t' + pieces[1] + '\t' + pieces[2] + "\n"); } map::iterator it; it = months.find(pieces[1]); //is this a valid month if (it != months.end()) { int yearNumber; util.mothurConvert(pieces[2], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[2] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } if (pieces[2].size() != 4) { m->mothurOut("[ERROR]: " + pieces[2] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } }else { //see if we can correct if pieces[1][0] = toupper(pieces[1][0]); for (int i = 1; i < pieces[1].size(); i++) { pieces[1][i] = tolower(pieces[1][i]); } //look again it = months.find(pieces[1]); //is this a valid month if (it == months.end()) { m->mothurOut("[ERROR] " + pieces[1] + " is not a valid month. Looking for ""Mmm-YYYY\" format.\n"); isOkay = false; } else { int yearNumber; util.mothurConvert(pieces[2], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[2] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } if (pieces[2].size() != 4) { m->mothurOut("[ERROR]: " + pieces[2] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } } } if (isOkay) { //check to make sure day is correct for month chosen int dayNumber; util.mothurConvert(pieces[0], dayNumber); if (dayNumber <= it->second) { if (dayNumber < 10) { //add leading 0. if (pieces[0].length() == 1) { pieces[0] = '0'+ pieces[0]; } } } if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[1]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } else { date = pieces[0] + "-" + pieces[1] + "-" + pieces[2]; } }else { date = pieces[0] + "-" + pieces[1] + "-" + pieces[2]; } } } }else { // no alpha months "YYYY" or "YYYY-mm-dd" or "YYYY-mm" if (m->getDebug()) { m->mothurOut("[DEBUG]: in nonAlpha\n"); } vector pieces; if (date.find_first_of('-') != string::npos) { util.splitAtDash(date, pieces); } else { pieces = util.splitWhiteSpace(date); } string format = "yearFirst"; if (pieces[0].length() == 4) { format = "yearFirst"; } else if (pieces[pieces.size()-1].length() == 4) { format = "yearLast"; } if (format == "yearFirst" ) { //just year if (pieces.size() == 1) { if (m->getDebug()) { m->mothurOut("[DEBUG]: yearFirst pieces = 1 -> " + pieces[0] + "\n"); } if (pieces[0].size() != 4) { m->mothurOut("[ERROR]: " + pieces[0] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } else { int yearNumber; util.mothurConvert(pieces[0], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } if (isOkay) { date=pieces[0]; } } }else if (pieces.size() == 2) { //"YYYY-mm" if (m->getDebug()) { m->mothurOut("[DEBUG]: yearFirst pieces = 2 -> " + pieces[0] + '\t' + pieces[1] + "\n"); } bool checkMonth = false; if (pieces[0].size() != 4) { m->mothurOut("[ERROR]: " + pieces[0] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } int yearNumber; util.mothurConvert(pieces[0], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } //perhaps needs leading 0 if (pieces[1].length() < 2) { pieces[1] = "0" + pieces[1]; } map::iterator it = monthsN.find(pieces[1]); if (it == monthsN.end()) { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid format for the month. Must be mm. \n"); isOkay = false; }else if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[1]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } } if (isOkay) { date = pieces[0] + "-" + pieces[1]; } }else if (pieces.size() == 3) { //"YYYY-mm-dd" if (m->getDebug()) { m->mothurOut("[DEBUG]: yearFirst pieces = 3 -> " + pieces[0] + '\t' + pieces[1] + '\t' + pieces[2] + "\n"); } bool checkMonth = false; if (pieces[0].size() != 4) { m->mothurOut("[ERROR]: " + pieces[0] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } int yearNumber; util.mothurConvert(pieces[0], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } //perhaps needs leading 0 if (pieces[1].length() < 2) { pieces[1] = "0" + pieces[1]; } map::iterator it = monthsN.find(pieces[1]); if (it == monthsN.end()) { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid format for the month. Must be mm. \n"); isOkay = false; }else { if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[1]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } } //is the day in range int maxDays = it->second; //perhaps needs leading 0 if (pieces[2].length() < 2) { pieces[2] = "0" + pieces[2]; } int day; util.mothurConvert(pieces[2], day); if (day <= maxDays) {} else { m->mothurOut("[ERROR]: " + pieces[2] + " is not a valid day for the month " + pieces[1]+ ". \n"); isOkay = false; } } if (isOkay) { date = pieces[0] + "-" + pieces[1] + "-" + pieces[2]; } } }else { // year last, try to fix format //if year last, then it could be dd-mm-yyyy or mm-dd-yyyy -> yyyy-mm-dd if (m->getDebug()) { m->mothurOut("[DEBUG]: yearLast pieces = 3 -> " + pieces[0] + '\t' + pieces[1] + '\t' + pieces[2] + "\n"); } if (pieces[2].size() != 4) { m->mothurOut("[ERROR]: " + pieces[2] + " is not a valid format for the year. Must be YYYY. \n"); isOkay = false; } bool checkMonth = false; int yearNumber; util.mothurConvert(pieces[0], yearNumber); if (yearNumber > thisYearNumber) { m->mothurOut("[ERROR]: year " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } else if (yearNumber == thisYearNumber) { checkMonth = true; } int first, second; util.mothurConvert(pieces[0], first); util.mothurConvert(pieces[1], second); if ((first <= 12) && (second <= 12)) { //we can't figure out which is the day and which is the month m->mothurOut("[ERROR]: " + pieces[0] + " and " + pieces[1] + " are both <= 12. Cannot determine which is the day and which is the month. \n"); isOkay = false; } else if ((first <= 12) && (second >= 12)) { //first=month and second = day, check valid date //perhaps needs leading 0 if (pieces[0].length() < 2) { pieces[0] = "0" + pieces[0]; } map::iterator it = monthsN.find(pieces[0]); if (it == monthsN.end()) { m->mothurOut("[ERROR]: " + pieces[0] + " is not a valid format for the month. Must be mm. \n"); isOkay = false; }else { if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[0]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[0] + " is in the future, please correct. \n"); isOkay = false; } } //is the day in range int maxDays = it->second; if (second <= maxDays) { //reformat to acceptable format //perhaps needs leading 0 if (pieces[1].length() < 2) { pieces[1] = "0" + pieces[1]; } date = pieces[2] + "-" + pieces[0] + "-" + pieces[1]; } else { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid day for the month " + pieces[0]+ ". \n"); isOkay = false; } } }else if ((second <= 12) && (first >= 12)) { //second=month and first = day, check valid date if (pieces[1].length() < 2) { pieces[1] = "0" + pieces[1]; } map::iterator it = monthsN.find(pieces[1]); if (it == monthsN.end()) { m->mothurOut("[ERROR]: " + pieces[1] + " is not a valid format for the month. Must be mm. \n"); isOkay = false; }else { if (checkMonth) { int monthNumber; util.mothurConvert(thisMonth, monthNumber); int monthInt = convertMonths[pieces[1]]; if (monthInt > monthNumber) { m->mothurOut("[ERROR]: month " + pieces[1] + " is in the future, please correct. \n"); isOkay = false; } } //is the day in range int maxDays = it->second; if (first <= maxDays) { //reformat to acceptable format //perhaps needs leading 0 if (pieces[0].length() < 2) { pieces[0] = "0" + pieces[0]; } date = pieces[2] + "-" + pieces[1] + "-" + pieces[0]; } else { m->mothurOut("[ERROR]: " + pieces[0] + " is not a valid day for the month " + pieces[1]+ ". \n"); isOkay = false; } } }else { m->mothurOut("[ERROR]: " + pieces[0] + " and " + pieces[1] + " are both > 12. No valid date. \n"); isOkay = false; } } } if (!isOkay) { m->mothurOut("[ERROR]: The date must be in one of the following formats: Date of sampling, in ""DD-Mmm-YYYY/"", ""Mmm-YYYY/"" or ""YYYY/"" format (eg., 30-Oct-1990, Oct-1990 or 1990) or ISO 8601 standard ""YYYY-mm-dd/"", ""YYYY-mm/"" (eg., 1990-10-30, 1990-10/"")"); } if (m->getDebug()) { m->mothurOut("[DEBUG]: date = " + date + "\n"); } return isOkay; } catch(exception& e) { m->errorOut(e, "SRACommand", "checkDateFormat"); exit(1); } } //********************************************************************************************************************** /* file option 1 sfffile1 oligosfile1 sfffile2 oligosfile2 ... file option 2 fastqfile1 oligosfile1 fastqfile2 oligosfile2 ... file option 3 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 4 group fastqfile fastqfile group fastqfile fastqfile group fastqfile fastqfile ... file option 5 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file ... */ int SRACommand::findFileOption(){ try { ifstream in; util.openInputFile(file, in); fileOption = 0; while(!in.eof()) { if (m->getControl_pressed()) { return 0; } string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); if (pieces.size() == 2) { //good pair and sff or fastq and oligos if (!setOligosParameter) { fileOption = 12; //1 or 2 }else { fileOption = 3; } }else if(pieces.size() == 3) { //good pair and paired read fileOption = 4; }else if (pieces.size() == 4) { fileOption = 5; } break; } in.close(); return fileOption; } catch(exception& e) { m->errorOut(e, "SRACommand", "findFileOption"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/sracommand.h000077500000000000000000000057561424121717000205370ustar00rootroot00000000000000// // sracommand.h // Mothur // // Created by SarahsWork on 10/28/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_sracommand_h #define Mothur_sracommand_h #include "command.hpp" #include "trimoligos.h" #include "oligos.h" #include "filefile.hpp" /**************************************************************************************************/ class SRACommand : public Command { public: SRACommand(string); ~SRACommand(){} vector setParameters(); string getCommandName() { return "make.sra"; } string getCommandCategory() { return "Sequence Processing"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "http://www.mothur.org/wiki/Make.sra"; } string getDescription() { return "create a Sequence Read Archive / SRA"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: struct Grant { string grantId, grantTitle, grantAgency; Grant(string i, string a) : grantId(i), grantAgency(a), grantTitle("") {} Grant(string i, string a, string t) : grantId(i), grantAgency(a), grantTitle(t) {} Grant() : grantId(""), grantAgency(""), grantTitle("") {} }; bool abort, isSFF, pairedOligos, setOligosParameter, includeScrap; int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, fileOption; string sfffile, fastqfile, file, oligosfile, contactfile, inputfile, mimarksfile, trim; string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType, checkorient; string submissionName, lastName, firstName, email, centerName, centerType, ownership, description, website, orientation, packageType; string projectName, projectTitle, inputDir; vector outputNames, Groups; vector Grants; map Group2Barcode; map Group2Primer; vector linkers; vector spacers; map Group2Organism; map > mimarks; //group -> valueForGroup> ex. F003D001 -> 42.282026 -83.733850> bool checkCasesInstrumentModels(string&); bool checkCasesPlatforms(string&); bool checkCasesLibStrategy(string&); bool checkCasesLibSource(string&); bool checkCasesLibSelection(string&); bool checkCasesDataType(string&); bool sanityCheckMiMarksGroups(); bool checkDateFormat(string& date); int readFile(map >&); int readContactFile(); int readMIMarksFile(); bool readOligos(); int parseSffFile(map >&); int parseFastqFile(map >&); int checkGroups(map >&); int mapGroupToFile(map >&, vector); int fixMap(map >&); int findFileOption(); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/srainfocommand.cpp000077500000000000000000000605551424121717000217440ustar00rootroot00000000000000// // srainfocommand.cpp // Mothur // // Created by Sarah Westcott on 10/29/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "srainfocommand.hpp" #include "systemcommand.h" //********************************************************************************************************************** vector SRAInfoCommand::setParameters(){ try { CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); CommandParameter pPreFetchlocation("prefetch", "String", "", "", "", "", "","",false,false); parameters.push_back(pPreFetchlocation); CommandParameter pFasterQlocation("fasterq", "String", "", "", "", "", "","",false,false); parameters.push_back(pFasterQlocation); CommandParameter pcompress("gz", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcompress); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pmaxsize("maxsize", "Number", "", "20", "", "", "","",false,false,true); parameters.push_back(pmaxsize); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fastq"] = tempOutNames; outputTypes["file"] = tempOutNames; outputTypes["sra"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SRAInfoCommand::getHelpString(){ try { string helpString = ""; helpString += "The sra.info command reads an accnos file containing sample names. It uses prefetch and fasterq_dump to download and extract the fastq files. The prefetch and fasterq_dump tools developed by NCBI, https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. Mothur is compatible with version 2.9.6 or greater. \n"; helpString += "The sra.info command parameters are accnos, fasterq, prefetch, maxsize, gz and processors.\n"; helpString += "The accnos parameter is used to give the list of samples for download. This file can be generated by clicking on the Accession List button in the Select table on the SRA Run Selector page.\n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is all available. \n"; helpString += "The maxsize parameter allows you to limit the size of the files downloaded by prefetch. The default is 20 (20GB). \n"; helpString += "The gz parameter allows you to compress the fastq files. The default is false. \n"; helpString += "The fasterq parameter allows you to specify location of the fasterq_dump executable. By default mothur will look in its location and the location of MOTHUR_TOOLS if specified at compile time or set through the set.dir(tools=locationOfExternalTools) command. Ex. sra.info(accnos=SRR_Acc_List.txt.csv, fasterq=/usr/bin/fasterq-dump.2.10.1) or sra.info(accnos=SRR_Acc_List.txt.csv, fasterq=/usr/local/fasterq_dump). Location and name of exe can be set.\n"; helpString += "The prefetch parameter allows you to specify location of the prefetch executable. By default mothur will look in its location and the location of MOTHUR_TOOLS if specified at compile time or set through the set.dir(tools=locationOfExternalTools) command. Ex. sra.info(accnos=SRR_Acc_List.txt.csv, prefetch=/usr/bin/prefetch.2.10.1) or sra.info(accnos=SRR_Acc_List.txt.csv, prefetch=/usr/local/prefetch). Location and name of exe can be set.\n"; helpString += "The sra.info command should be in the following format: sra.info(accnos=yourAccnosFile)\n"; helpString += "sra.info(sra=SRR_Acc_List.txt.csv) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SRAInfoCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fastq") { pattern = "[filename],fastq"; } else if (type == "file") { pattern = "[filename],[tag],files"; } else if (type == "sra") { pattern = "[filename],sra"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SRAInfoCommand::SRAInfoCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; accnosfile = validParameter.validFile(parameters, "accnos"); if (accnosfile == "not open") { accnosfile = ""; abort = true; } else if (accnosfile == "not found") { m->mothurOut("[ERROR]: The accnos parameter is required.\n"); abort = true; } if (outputdir == ""){ outputdir += util.hasPath(accnosfile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "gz"); if (temp == "not found") { temp = "F"; } compressGZ = util.isTrue(temp); temp = validParameter.valid(parameters, "maxsize"); if (temp == "not found"){ temp = "20"; } util.mothurConvert(temp, maxSize); maxSize *= 1000000; vector versionOutputs; bool foundTool = false; string programName = "fasterq-dump"; programName += EXECUTABLE_EXT; string programVersion = "2.9.6"; #ifdef WINDOWS programName = "fastq-dump"; programName += EXECUTABLE_EXT; #endif fasterQLocation = validParameter.validFile(parameters, "fasterq"); if (fasterQLocation == "not found") { fasterQLocation = ""; foundTool = util.findTool(programName, fasterQLocation, versionOutputs, current->getLocations()); }else { //test to make sure fasterq exists ifstream in; fasterQLocation = util.getFullPathName(fasterQLocation); bool ableToOpen = util.openInputFile(fasterQLocation, in, "no error"); in.close(); if(!ableToOpen) { m->mothurOut(fasterQLocation + " file does not exist or cannot be opened, ignoring.\n"); fasterQLocation = ""; programName = util.getSimpleName(fasterQLocation); fasterQLocation = ""; foundTool = util.findTool(programName, fasterQLocation, versionOutputs, current->getLocations()); } } if (foundTool && !abort) { //check fasterq_dump version if (versionOutputs.size() >= 3) { string version = versionOutputs[2]; if (!checkVersion(programVersion, version)) { m->mothurOut("[ERROR]: " + programName + " version found = " + version + ". Mothur requires version " + programVersion + " which is distributed with mothur's executable or available for download here, https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software\n"); abort = true; }else { m->mothurOut("Using " + programName + " version " + version + ".\n"); } } } foundTool = false; programName = "prefetch"; programName += EXECUTABLE_EXT; versionOutputs.clear(); programVersion = "2.9.3"; #ifdef WINDOWS programName = "prefetch"; programName += EXECUTABLE_EXT; #endif prefetchLocation = validParameter.validFile(parameters, "prefetch"); if (prefetchLocation == "not found") { prefetchLocation = ""; foundTool = util.findTool(programName, prefetchLocation, versionOutputs, current->getLocations()); }else { //test to make sure prefetch exists ifstream in; prefetchLocation = util.getFullPathName(prefetchLocation); bool ableToOpen = util.openInputFile(prefetchLocation, in, "no error"); in.close(); if(!ableToOpen) { m->mothurOut(prefetchLocation + " file does not exist or cannot be opened, ignoring.\n"); prefetchLocation = ""; programName = util.getSimpleName(prefetchLocation); prefetchLocation = ""; foundTool = util.findTool(programName, prefetchLocation, versionOutputs, current->getLocations()); } } if (foundTool && !abort) { //check fasterq_dump version if (versionOutputs.size() >= 3) { string version = versionOutputs[2]; if (!checkVersion(programVersion, version)) { m->mothurOut("[ERROR]: " + programName + " version found = " + version + ". Mothur requires version " + programVersion + " which is distributed with mothur's executable or available for download here, https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software\n"); abort = true; }else { m->mothurOut("Using " + programName + " version " + version + ".\n"); } } } if (!foundTool) { abort = true; } if (m->getDebug()) { m->mothurOut("[DEBUG]: fasterq-dump location using " + fasterQLocation + "\n"); m->mothurOut("[DEBUG]: prefetch location using " + prefetchLocation + "\n"); } } } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "SRAInfoCommand"); exit(1); } } //*************************************************************************************************************** int SRAInfoCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } unordered_set samples = util.readAccnos(accnosfile); map variables; string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir = util.hasPath(accnosfile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(accnosfile)); variables["[tag]"] = ""; string fileFileName = getOutputFileName("file",variables); ofstream out; util.openOutputFile(fileFileName, out); variables["[tag]"] = "single"; string singleFileFileName = getOutputFileName("file",variables); ofstream outSingle; util.openOutputFile(singleFileFileName, outSingle); int count = 0; for (auto it = samples.begin(); it != samples.end(); it++) { m->mothurOut("\n>>>>>\tProcessing sample " + *it + " (" + toString(count+1) + " of " + toString(samples.size()) + ")\t<<<<<\n"); count++; string downloadedFile = runPreFetch(*it); if (downloadedFile != "fail") { vector filenames; bool hasBoth = runFastqDump(downloadedFile, filenames); if (hasBoth) { outputNames.push_back(filenames[0]); outputTypes["fastq"].push_back(filenames[0]); outputNames.push_back(filenames[1]); outputTypes["fastq"].push_back(filenames[1]); out << *it << '\t' << util.getSimpleName(filenames[0]) << '\t' << util.getSimpleName(filenames[1]) << endl; }else { if (filenames.size() != 0) { outputNames.push_back(filenames[0]); outputTypes["fastq"].push_back(filenames[0]); outSingle << util.getSimpleName(filenames[0]) << endl; } } } } out.close(); outSingle.close(); //remove if not filled if (util.isBlank(fileFileName)) { util.mothurRemove(fileFileName); }else { outputNames.push_back(fileFileName); outputTypes["file"].push_back(fileFileName); } if (util.isBlank(singleFileFileName)) { util.mothurRemove(singleFileFileName); }else { outputNames.push_back(singleFileFileName); outputTypes["file"].push_back(singleFileFileName); } string currentName = ""; itTypes = outputTypes.find("file"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFileFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "execute"); exit(1); } } //*************************************************************************************************************** string SRAInfoCommand::runPreFetch(string sampleName){ try{ double_t start = time(nullptr); vector cPara; string prefetchCommand = prefetchLocation; prefetchCommand = "\"" + prefetchCommand + "\" " + sampleName + " "; cPara.push_back(util.mothurConvert(prefetchCommand)); if (maxSize != 20000000) { //-X|--max-size maximum file size to download in KB (exclusive). Default: 20G string msize = toString(maxSize); cPara.push_back(util.mothurConvert("-X")); cPara.push_back(util.mothurConvert(msize)); } //-o|--outfile output-file map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sampleName))+"."; string outputFileName = getOutputFileName("sra",variables); cPara.push_back(util.mothurConvert("-o")); cPara.push_back(util.mothurConvert(outputFileName)); char** preFetchParameters; preFetchParameters = new char*[cPara.size()]; string commandString = ""; for (int i = 0; i < cPara.size(); i++) { preFetchParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] preFetchParameters; if (m->getDebug()) { m->mothurOut("[DEBUG]: prefetch command = " + commandString + ".\n"); } //m->mothurOut("prefetch command = " + commandString + ".\n"); ifstream inTest; if (util.openInputFile(outputFileName, inTest, "no error")) { m->mothurOut("\n" + outputFileName + " is found locally, skipping prefetch.\n\n"); return outputFileName; } //run system command runSystemCommand(commandString); //check for output files ifstream in; if (!util.openInputFile(outputFileName, in, "no error")) { m->mothurOut("\n\n[ERROR]: prefetch was unable to download sample " + sampleName + ", skipping.\n\n"); return "fail"; }else { outputNames.push_back(outputFileName); outputTypes["sra"].push_back(outputFileName); } m->mothurOut("It took " + toString(time(nullptr)-start)+ " seconds to download sample " + sampleName + ".\n"); return outputFileName; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "runPreFetch"); exit(1); } } //*************************************************************************************************************** bool SRAInfoCommand::runFastqDump(string sampleFile, vector& filenames){ try{ vector cPara; string fasterQCommand = fasterQLocation; fasterQCommand = "\"" + fasterQCommand + "\" " + sampleFile + " "; cPara.push_back(util.mothurConvert(fasterQCommand)); //-S|--split-files write reads into different files string splitFiles; #if defined NON_WINDOWS splitFiles = "-S"; #else splitFiles = "--split-files"; #endif cPara.push_back(util.mothurConvert(splitFiles)); string splitSingleFiles; #if defined NON_WINDOWS //-3|--split-3 writes single reads in special file splitSingleFiles = "-3"; #else splitSingleFiles = "--split-3"; #endif cPara.push_back(util.mothurConvert(splitSingleFiles)); #if defined NON_WINDOWS //-e|--threads=processors string numProcessors = toString(processors); cPara.push_back(util.mothurConvert("-e")); cPara.push_back(util.mothurConvert(numProcessors)); #endif #if defined NON_WINDOWS #else if (compressGZ) { cPara.push_back(util.mothurConvert("-gzip")); } #endif //-o|--outfile output-file #if defined NON_WINDOWS map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sampleFile)); string outputFileName = getOutputFileName("fastq", variables); cPara.push_back(util.mothurConvert("-o")); cPara.push_back(util.mothurConvert(outputFileName)); #else if (outputdir != "") { string outputFileName = outputdir; cPara.push_back(util.mothurConvert("-outdir")); cPara.push_back(util.mothurConvert(outputFileName)); } #endif char** fasterQParameters; fasterQParameters = new char*[cPara.size()]; string commandString = ""; for (int i = 0; i < cPara.size(); i++) { fasterQParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } #if defined NON_WINDOWS #else commandString = "\"" + commandString + "\""; #endif //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] fasterQParameters; if (m->getDebug()) { m->mothurOut("[DEBUG]: fasterq_dump command = " + commandString + ".\n"); } //m->mothurOut("fasterq_dump command = " + commandString + ".\n"); ifstream testfin, testrin; string tag = "fastq"; if (compressGZ) { tag += ".gz"; } string ffastq = outputdir + util.trimStringEnd(util.getRootName(util.getSimpleName(sampleFile)), 1) +"_1." + tag; string rfastq = outputdir + util.trimStringEnd(util.getRootName(util.getSimpleName(sampleFile)), 1) +"_2." + tag; bool found = false; //already exist?? if (util.openInputFile(ffastq, testfin, "no error")) { testfin.close(); if (util.openInputFile(rfastq, testrin, "no error")) { m->mothurOut("\n" + ffastq + " and " + rfastq + " found locally, skipping fasterq_dump.\n\n"); found = true; testrin.close(); filenames.push_back(ffastq); filenames.push_back(rfastq); return found; } } if (!found) { runSystemCommand(commandString); } //fasterq does not have --gzip option, fastq does #if defined NON_WINDOWS if (compressGZ) { //run system command to compress files string tag = "fastq"; string ffastq = outputdir + util.trimStringEnd(util.getRootName(util.getSimpleName(sampleFile)), 1) +"_1." + tag; string rfastq = outputdir + util.trimStringEnd(util.getRootName(util.getSimpleName(sampleFile)), 1) +"_2." + tag; string inputString = "gzip -f " + ffastq; runSystemCommand(inputString); util.mothurRemove(ffastq); inputString = "gzip -f " + rfastq; runSystemCommand(inputString); util.mothurRemove(rfastq); } #else #endif //check for output files. trimstring removes last character ifstream fin, rin; bool hasBoth = true; if (util.openInputFile(ffastq, fin, "no error")) { filenames.push_back(ffastq); fin.close(); }else { hasBoth = false; } if (util.openInputFile(rfastq, rin, "no error")) { filenames.push_back(rfastq); rin.close(); }else { hasBoth = false; } return hasBoth; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "runFastqDump"); exit(1); } } /**************************************************************************************************/ //versionNeeded = 2.9.3 versionProvided = 2.10.1 bool SRAInfoCommand::checkVersion(string versionNeeded, string versionProvided){ try{ vector versionRequired; vector versionGiven; vector temps; util.splitAtChar(versionProvided, temps, '.'); for (int i = 0; i < temps.size(); i++) { int thisTemp; util.mothurConvert(temps[i], thisTemp); versionGiven.push_back(thisTemp); } temps.clear(); util.splitAtChar(versionNeeded, temps, '.'); for (int i = 0; i < temps.size(); i++) { int thisTemp; util.mothurConvert(temps[i], thisTemp); versionRequired.push_back(thisTemp); } //main version tag is too old. 2.9.3, 1.9.1 ie versionNeeded[0] = 2, versionProvided[0] = 1 if (versionRequired[0] < versionGiven[0]) { return true; } else { if (versionRequired[0] == versionGiven[0]) { //same major version, check minors //minor version tag is too old. 2.9.3, 2.8.1 ie versionNeeded[0] = 9, versionProvided[0] = 8 if (versionRequired[1] < versionGiven[1]) { return true; } else { if (versionRequired[1] == versionGiven[1]) { //same minor version, check next minor //patch version tag is too old. 2.9.3, 2.9.1 ie versionNeeded[0] = 3, versionProvided[0] = 1 if (versionRequired[2] <= versionGiven[2]) { return true; } }//else required is greater than given return false } }//else required is greater than given return false } return false; } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "checkVersion"); exit(1); } } /**************************************************************************************************/ void SRAInfoCommand::runSystemCommand(string inputString){ try{ m->mothurOut("/******************************************/\n"); m->mothurOut("\nRunning command: system(" + inputString + ")\n"); system(inputString.c_str()); m->mothurOut("/******************************************/\n"); } catch(exception& e) { m->errorOut(e, "SRAInfoCommand", "runSystemCommand"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/srainfocommand.hpp000066400000000000000000000031121424121717000217300ustar00rootroot00000000000000// // srainfocommand.hpp // Mothur // // Created by Sarah Westcott on 10/29/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef srainfocommand_hpp #define srainfocommand_hpp #include "command.hpp" /**************************************************************************************************/ class SRAInfoCommand : public Command { public: SRAInfoCommand(string); ~SRAInfoCommand(){} vector setParameters(); string getCommandName() { return "sra.info"; } string getCommandCategory() { return "Sequence Processing"; } string getOutputPattern(string); string getHelpString(); string getCitation() { return "Wrapper for prefetch and fasterq_dump programs written by NCBI https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software http://www.mothur.org/wiki/sra.info"; } string getDescription() { return "extracts fastq files from samples using prefetch and fasterq_dump program written by NCBI"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, compressGZ; vector outputNames; string accnosfile, outputType, fasterQLocation, prefetchLocation; int processors, maxSize; string runPreFetch(string); bool runFastqDump(string, vector&); void runSystemCommand(string); bool checkVersion(string versionNeeded, string versionProvided); }; /**************************************************************************************************/ #endif /* srainfocommand_hpp */ mothur-1.48.0/source/commands/subsamplecommand.cpp000066400000000000000000001730271424121717000222720ustar00rootroot00000000000000/* * subsamplecommand.cpp * Mothur * * Created by westcott on 10/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "subsamplecommand.h" #include "uniqueseqscommand.h" #include "getseqscommand.h" #include "subsample.h" //********************************************************************************************************************** vector SubSampleCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FLSSR", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); CommandParameter plist("list", "InputTypes", "", "", "none", "FLSSR", "none","list",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "none", "FLSSR", "none","shared",false,false,true); parameters.push_back(pshared); CommandParameter prabund("rabund", "InputTypes", "", "", "none", "FLSSR", "none","rabund",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "none", "FLSSR", "none","sabund",false,false); parameters.push_back(psabund); CommandParameter ptree("tree", "InputTypes", "", "", "none", "FLSSR", "none","tree",false,false); parameters.push_back(ptree); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(psize); CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(ppersample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["list"] = tempOutNames; outputTypes["rabund"] = tempOutNames; outputTypes["sabund"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["count"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["tree"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SubSampleCommand::getHelpString(){ try { string helpString = ""; helpString += "The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n"; helpString += "The sub.sample command parameters are " + getCommandParameters() + ". You must provide a fasta, list, sabund, rabund or shared file as an input file.\n"; helpString += "The namefile is only used with the fasta file, not with the listfile, because the list file should contain all sequences.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"; helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"; helpString += "The size parameter allows you indicate the size of your subsample.\n"; helpString += "The persample parameter allows you indicate you want to select subsample of the same size from each of your groups, default=false. It is only used with the list and fasta files if a groupfile is given.\n"; helpString += "persample=false will select a random set of sequences of the size you select, but the number of seqs from each group may differ.\n"; helpString += "The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files if a groupfile is given and persample=true, then size=number of seqs in smallest sample, otherwise size=10% of number of seqs.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n"; helpString += "Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n"; helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"; helpString += "The sub.sample command outputs a .subsample file.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SubSampleCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],subsample,[extension]"; } else if (type == "sabund") { pattern = "[filename],subsample,[extension]"; } else if (type == "name") { pattern = "[filename],subsample,[extension]"; } else if (type == "group") { pattern = "[filename],subsample,[extension]"; } else if (type == "count") { pattern = "[filename],subsample,[extension]"; } else if (type == "tree") { pattern = "[filename],subsample,[extension]"; } else if (type == "list") { pattern = "[filename],[distance],subsample,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],subsample,[extension]"; } else if (type == "constaxonomy"){ pattern = "[filename],subsample,[extension]"; } else if (type == "shared") { pattern = "[filename],[distance],subsample,[extension]"; } else if (type == "rabund") { pattern = "[filename],subsample,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SubSampleCommand::SubSampleCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { current->setRabundFile(rabundfile); } fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { current->setFastaFile(fastafile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { current->setSharedFile(sharedfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } taxonomyfile = validParameter.validFile(parameters, "taxonomy"); if (taxonomyfile == "not open") { taxonomyfile = ""; abort = true; } else if (taxonomyfile == "not found") { taxonomyfile = ""; } else { current->setTaxonomyFile(taxonomyfile); } constaxonomyfile = validParameter.validFile(parameters, "constaxonomy"); if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; } else if (constaxonomyfile == "not found") { constaxonomyfile = ""; } else { current->setConsTaxonomyFile(constaxonomyfile); } treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { treefile = ""; } else { current->setTreeFile(treefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); ct.readTable(countfile, true, false); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; pickedGroups = false; } else { pickedGroups = true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "size"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, size); temp = validParameter.valid(parameters, "persample"); if (temp == "not found"){ temp = "f"; } persample = util.isTrue(temp); temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); if ((groupfile == "") && (countfile == "")) { persample = false; } if (countfile != "") { if (!ct.hasGroupInfo()) { persample = false; if (pickedGroups) { m->mothurOut("You cannot pick groups without group info in your count file.\n"); abort = true; } } } if ((namefile != "") && ((fastafile == "") && (taxonomyfile == "") && (treefile == ""))) { m->mothurOut("You may only use a name file with a fasta file, tree file or taxonomy file.\n"); abort = true; } if ((taxonomyfile != "") && ((fastafile == "") && (listfile == ""))) { m->mothurOut("You may only use a taxonomy file with a fasta file or list file.\n"); abort = true; } if ((constaxonomyfile != "") && ((sharedfile == "") && (listfile == ""))) { m->mothurOut("You may only use a constaxonomy file with a shared file or list file.\n"); abort = true; } if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "") && (treefile == "")) { m->mothurOut("You must provide a fasta, list, sabund, rabund, shared or tree file as an input file.\n"); abort = true; } if (pickedGroups && ((groupfile == "") && (sharedfile == "") && (countfile == ""))) { m->mothurOut("You cannot pick groups without a valid group, count or shared file.\n"); abort = true; } if (((groupfile != "") || (countfile != "")) && ((fastafile == "") && (listfile == "") && (treefile == ""))) { m->mothurOut("Group or count files are only valid with list file, fasta file or tree file.\n"); abort = true; } if (((groupfile != "") || (countfile != "")) && ((fastafile != "") && (listfile != ""))) { m->mothurOut("A new group or count file can only be made from the subsample of a list file or fasta file, not both. Please correct.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "SubSampleCommand"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (sharedfile != "") { getSubSampleShared(); } else if (listfile != "") { getSubSampleList(); } //can only use with replacement if a count file is provided else if (rabundfile != "") { getSubSampleRabund(); } else if (sabundfile != "") { getSubSampleSabund(); } else if (fastafile != "") { getSubSampleFasta(); } //can only use with replacement if a count file is provided else if (treefile != "") { getSubSampleTree(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setListFile(currentName); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSharedFile(currentName); } } itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setRabundFile(currentName); } } itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSabundFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } itTypes = outputTypes.find("constaxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setConsTaxonomyFile(currentName); } } itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "execute"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::getSubSampleTree() { try { TreeReader* reader = nullptr; string cinputfile = countfile; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); cinputfile = namefile; }else { reader = new TreeReader(treefile, countfile); } vector T; T = reader->getTrees(); //user trees CountTable* ct; ct = T[0]->getCountTable(); vector Treenames = T[0]->getTreeNames(); delete reader; if (size == 0) { //user has not set size, set size = smallest samples size size = ct->getNumSeqsSmallestGroup(); }else { ct->removeGroup(size); } if (!pickedGroups) { Groups = ct->getNamesOfGroups(); } if (Groups.size() == 0) { m->mothurOut("The size you selected is too large, skipping tree file.\n"); return 0; } m->mothurOut("Sampling " + toString(size) + " from each group.\n"); //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. CountTable* newCt = new CountTable(); SubSample sample; Tree* subSampleTree; //sets unwanted seqs to doNotIncludeMe, prune below if (withReplacement) { subSampleTree = sample.getSampleWithReplacement(T[0], ct, newCt, size, Groups); } else { subSampleTree = sample.getSample(T[0], ct, newCt, size, Groups); } if (m->getControl_pressed()) { delete newCt; delete subSampleTree; return 0; } vector newTreeNames = newCt->getNamesOfSeqs(Groups); //without "doNotIncludeMe" Tree* outputTree = new Tree(newTreeNames.size(), ct, Treenames); //create ouptut tree - respecting pickedGroups outputTree->getSubTree(subSampleTree, newTreeNames); //builds tree with only seqs present in newCt outputTree->assembleTree(); newCt->removeGroup("doNotIncludeMe"); string treeOutputDir = outputdir; if (outputdir == "") { treeOutputDir += util.hasPath(treefile); } map variables; variables["[filename]"] = treeOutputDir + util.getRootName(util.getSimpleName(treefile)); variables["[extension]"] = util.getExtension(treefile); string treeOutputFileName = getOutputFileName("tree", variables); outputTypes["tree"].push_back(treeOutputFileName); outputNames.push_back(treeOutputFileName); ofstream outTree; util.openOutputFile(treeOutputFileName, outTree); outputTree->print(outTree, "both"); outTree.close(); for (int i = 0; i < T.size(); i++) { delete T[i]; } delete subSampleTree; delete outputTree; string countOutputDir = outputdir; if (outputdir == "") { countOutputDir += util.hasPath(cinputfile); } variables["[filename]"] = countOutputDir + util.getRootName(util.getSimpleName(cinputfile)); variables["[extension]"] = ".count_table"; string countOutputFile = getOutputFileName("count",variables); outputNames.push_back(countOutputFile); outputTypes["count"].push_back(countOutputFile); newCt->printTable(countOutputFile); delete newCt; delete ct; return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleTree"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::getSubSampleFasta() { try { vector names; if (namefile != "") { names = readNames(); } //fills names with all names in namefile. else { names = getNames(); }//no name file, so get list of names to pick from GroupMap groupMap; if (groupfile != "") { groupMap.readMap(groupfile); if (Groups.size() == 0) { Groups = groupMap.getNamesOfGroups(); } //file mismatch quit if (names.size() != groupMap.getNumSeqs()) { m->mothurOut("[ERROR]: your fasta file contains " + toString(names.size()) + " sequences, and your groupfile contains " + toString(groupMap.getNumSeqs()) + ", please correct.\n"); return 0; } }else if (countfile != "") { if (ct.hasGroupInfo()) { if (Groups.size() == 0) { Groups = ct.getNamesOfGroups(); } } //file mismatch quit if (names.size() != ct.getNumUniqueSeqs()) { m->mothurOut("[ERROR]: your fasta file contains " + toString(names.size()) + " sequences, and your count file contains " + toString(ct.getNumUniqueSeqs()) + " unique sequences, please correct.\n"); return 0; } } if (m->getControl_pressed()) { return 0; } //make sure that if your picked groups size is not too big int thisSize = 0; if (countfile == "") { thisSize = names.size(); if (withReplacement) { m->mothurOut("[WARNING]: To use the withreplacement option with a fasta file, you need to provide a count file, running without replacement.\n"); withReplacement = false; } } else { thisSize = ct.getNumSeqs(); } //all seqs not just unique if (persample) { if (size == 0) { //user has not set size, set size = smallest samples size if (countfile == "") { size = groupMap.getNumSeqsSmallestGroup(); } else { size = ct.getNumSeqsSmallestGroup(); } }else { //make sure size is not too large vector newGroups; for (int i = 0; i < Groups.size(); i++) { int thisSize = 0; if (countfile == "") { thisSize = groupMap.getNumSeqs(Groups[i]); } else { thisSize = ct.getGroupCount(Groups[i]); } if (thisSize >= size) { newGroups.push_back(Groups[i]); } else { m->mothurOut("You have selected a size that is larger than " + Groups[i] + " number of sequences, removing " + Groups[i] + ".\n"); } } Groups = newGroups; if (newGroups.size() == 0) { m->mothurOut("[ERROR]: all groups removed.\n"); m->setControl_pressed(true); } } m->mothurOut("Sampling " + toString(size) + " from each group.\n"); }else { if (pickedGroups) { int total = 0; for(int i = 0; i < Groups.size(); i++) { if (countfile == "") { total += groupMap.getNumSeqs(Groups[i]); } else { total += ct.getGroupCount(Groups[i]); } } if (size == 0) { size = int (total * 0.10); } //user has not set size, set size = 10% samples size if (total < size) { if (size != 0) { m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + ".\n"); } size = int (total * 0.10); } m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + ".\n"); } if (size == 0) { //user has not set size, set size = 10% samples size if (countfile == "") { size = int (names.size() * 0.10); } else { size = int (ct.getNumSeqs() * 0.10); } } if (size > thisSize) { m->mothurOut("Your fasta file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + ".\n"); size = thisSize; } if (!pickedGroups) { m->mothurOut("Sampling " + toString(size) + " from " + toString(thisSize) + ".\n"); } } util.mothurRandomShuffle(names); set subset; //subset may contain names from column 2 of namefile if namefile is used. Will need to be sure to match these with unique name in fasta file if (countfile == "") { //fill subset with the names we want to sample SubSample sample; if (groupfile != "") { GroupMap sampledGm = sample.getSample(groupMap, size, Groups, persample); vector sampledSeqs = sampledGm.getNamesSeqs(); for (int i = 0; i < sampledSeqs.size(); i++) { subset.insert(sampledSeqs[i]); } string groupOutputDir = outputdir; if (outputdir == "") { groupOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = groupOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string groupOutputFileName = getOutputFileName("group", variables); outputTypes["group"].push_back(groupOutputFileName); outputNames.push_back(groupOutputFileName); sampledGm.print(groupOutputFileName); }else { for (int i = 0; i < size; i++) { subset.insert(names[i]); } } }else { SubSample sample; CountTable sampledCt; if (withReplacement) { sampledCt = sample.getSampleWithReplacement(ct, size, Groups, persample); } else { sampledCt = sample.getSample(ct, size, Groups, persample); } vector sampledSeqs = sampledCt.getNamesOfSeqs(); for (int i = 0; i < sampledSeqs.size(); i++) { subset.insert(sampledSeqs[i]); } string countOutputDir = outputdir; if (outputdir == "") { countOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = countOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string countOutputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(countOutputFileName); outputNames.push_back(countOutputFileName); sampledCt.printTable(countOutputFileName); } if (subset.size() == 0) { m->mothurOut("The size you selected is too large, skipping fasta file.\n"); return 0; } string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[extension]"] = util.getExtension(fastafile); string outputFileName = getOutputFileName("fasta", variables); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); ofstream out; util.openOutputFile(outputFileName, out); ifstream in; util.openInputFile(fastafile, in); string thisname; int count = 0; map >::iterator itNameMap; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); out.close(); return 0; } Sequence currSeq(in); thisname = currSeq.getName(); if (thisname != "") { //does the subset contain a sequence that this sequence represents itNameMap = nameMap.find(thisname); if (itNameMap != nameMap.end()) { vector nameRepresents = itNameMap->second; for (int i = 0; i < nameRepresents.size(); i++){ if (subset.count(nameRepresents[i]) != 0) { out << ">" << nameRepresents[i] << endl << currSeq.getAligned() << endl; count++; } } }else{ m->mothurOut("[ERROR]: " + thisname + " is not in your namefile, please correct.\n"); } } gobble(in); } in.close(); out.close(); if (count != subset.size()) { m->mothurOut("[ERROR]: The subset selected contained " + toString(subset.size()) + " sequences, but I only found " + toString(count) + " of those in the fastafile.\n"); } if (namefile != "") { m->mothurOut("Deconvoluting subsampled fasta file... \n"); map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputNameFileName = getOutputFileName("name", variables); //use unique.seqs to create new name and fastafile string inputString = "fasta=" + outputFileName; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: unique.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* uniqueCommand = new UniqueSeqsCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; current->setMothurCalling(false); util.renameFile(filenames["name"][0], outputNameFileName); util.renameFile(filenames["fasta"][0], outputFileName); outputTypes["name"].push_back(outputNameFileName); outputNames.push_back(outputNameFileName); m->mothurOut("/******************************************/\n"); m->mothurOut("Done.\n"); if (taxonomyfile != "") { set tempSubset; //get new unique names from fasta file //read through fasta file outputting only the names on the subsample list after deconvolute ifstream in2; util.openInputFile(outputFileName, in2); while (!in2.eof()) { Sequence seq(in2); gobble(in2); if (seq.getName() != "") { tempSubset.insert(seq.getName()); } } in2.close(); //send that list to getTax int tcount = getTax(tempSubset); if (tcount != tempSubset.size()) { m->mothurOut("[ERROR]: subsampled fasta file contains " + toString(tempSubset.size()) + " sequences, but I only found " + toString(tcount) + " in your taxonomy file, please correct.\n"); } } }else { if (taxonomyfile != "") { int tcount = getTax(subset); if (tcount != subset.size()) { m->mothurOut("[ERROR]: subsampled fasta file contains " + toString(subset.size()) + " sequences, but I only found " + toString(tcount) + " in your taxonomy file, please correct.\n"); } } //should only contain uniques. } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleFasta"); exit(1); } } //********************************************************************************************************************** vector SubSampleCommand::getNames() { try { vector names; ifstream in; util.openInputFile(fastafile, in); string thisname; while(!in.eof()){ if (m->getControl_pressed()) { in.close(); return names; } Sequence currSeq(in); thisname = currSeq.getName(); if (thisname != "") { vector temp; temp.push_back(thisname); nameMap[thisname] = temp; names.push_back(thisname); } gobble(in); } in.close(); return names; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getNames"); exit(1); } } //********************************************************************************************************************** vector SubSampleCommand::readNames() { try { vector names; nameMap.clear(); util.readNames(namefile, nameMap); //save names of all sequences map >::iterator it; for (it = nameMap.begin(); it != nameMap.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { names.push_back((it->second)[i]); } } return names; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "readNames"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::getSubSampleShared() { try { InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (constaxonomyfile != "") { //you have a constaxonomy file and have not set the labels parameter. Either your sharedfile has one label or we only want to use one since the constaxonomy file is related to one label if (labels.size() == 0) { labels.insert(lastLabel); allLines = false; m->mothurOut("\n[WARNING]: The constaxonomy file represents a single label in your shared file. You did not set the label parameter, so mothur is assuming you want to use label " + lastLabel + ". If this is not correct file mismatches can occur.\n\n"); } } if (size == 0) { //user has not set size, set size = smallest samples size size = lookup->getNumSeqsSmallestGroup(); }else { lookup->removeGroups(size); Groups = lookup->getNamesGroups(); } if (lookup->size() == 0) { m->mothurOut("The size you selected is too large, skipping shared file.\n"); return 0; } m->mothurOut("Sampling " + toString(size) + " from each group.\n"); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } processShared(lookup); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleShared"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::processShared(SharedRAbundVectors*& thislookup) { try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sharedfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sharedfile)); variables["[extension]"] = util.getExtension(sharedfile); variables["[distance]"] = thislookup->getLabel(); string outputFileName = getOutputFileName("shared", variables); SubSample sample; if (withReplacement) { sample.getSampleWithReplacement(thislookup, size); } else { sample.getSample(thislookup, size); } if (m->getControl_pressed()) { return 0; } ofstream out; util.openOutputFile(outputFileName, out); outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); bool printHeaders = true; thislookup->print(out, printHeaders); out.close(); if (constaxonomyfile != "") { //select otus from constaxonomy that are in new shared file. Also adjust the size column in the constaxonomy file string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomyfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomyfile)); variables["[extension]"] = util.getExtension(constaxonomyfile); variables["[distance]"] = thislookup->getLabel(); string consOutputFileName = getOutputFileName("constaxonomy", variables); ifstream in; util.openInputFile(constaxonomyfile, in); //read headers string headers = util.getline(in); gobble(in); ofstream outCons; util.openOutputFile(consOutputFileName, outCons); outputTypes["constaxonomy"].push_back(consOutputFileName); outputNames.push_back(consOutputFileName); outCons << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; string tax = "unknown"; int size = 0; in >> otu; gobble(in); in >> size; gobble(in); tax = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + otu + toString(size) + tax + "\n"); } size = thislookup->getOTUTotal(otu); if (size != 0) { outCons << otu << '\t' << size << '\t' << tax << endl; } } in.close(); outCons.close(); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "processShared"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::getSubSampleList() { try { if (namefile != "") { util.readNames(namefile, nameMap); } if ((countfile == "") && withReplacement) { m->mothurOut("[WARNING]: To use the withreplacement option with a fasta file, you need to provide a count file, running without replacement.\n"); withReplacement = false; } InputData input(listfile, "list", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; ListVector* list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); if (constaxonomyfile != "") { //you have a constaxonomy file and have not set the labels parameter. Either your list file has one label or we only want to use one since the constaxonomy file is related to one label if (labels.size() == 0) { labels.insert(lastLabel); allLines = false; m->mothurOut("\n[WARNING]: The constaxonomy file represents a single label in your list file. You did not set the label parameter, so mothur is assuming you want to use label " + lastLabel + ". If this is not correct file mismatches can occur.\n\n"); } } GroupMap groupMap; if (groupfile != "") { groupMap.readMap(groupfile); //takes care of user setting groupNames that are invalid or setting groups=all if (Groups.size() == 0) { Groups = groupMap.getNamesOfGroups(); } //file mismatch quit if (list->getNumSeqs() != groupMap.getNumSeqs()) { m->mothurOut("[ERROR]: your list file contains " + toString(list->getNumSeqs()) + " sequences, and your groupfile contains " + toString(groupMap.getNumSeqs()) + ", please correct.\n"); delete list; return 0; } }else if (countfile != "") { if (ct.hasGroupInfo()) { if (Groups.size() == 0) { Groups = ct.getNamesOfGroups(); } } //file mismatch quit if (list->getNumSeqs() != ct.getNumUniqueSeqs()) { m->mothurOut("[ERROR]: your list file contains " + toString(list->getNumSeqs()) + " sequences, and your count file contains " + toString(ct.getNumUniqueSeqs()) + " unique sequences, please correct.\n"); return 0; } } //make sure that if your picked groups size is not too big if (persample) { if (size == 0) { //user has not set size, set size = smallest samples size if (countfile == "") { size = groupMap.getNumSeqsSmallestGroup(); } else { size = ct.getNumSeqsSmallestGroup(); } }else { //make sure size is not too large vector newGroups; for (int i = 0; i < Groups.size(); i++) { int thisSize = 0; if (countfile == "") { thisSize = groupMap.getNumSeqs(Groups[i]); } else { thisSize = ct.getGroupCount(Groups[i]); } if (thisSize >= size) { newGroups.push_back(Groups[i]); } else { m->mothurOut("You have selected a size that is larger than " + Groups[i] + " number of sequences, removing " + Groups[i] + ".\n"); } } Groups = newGroups; if (newGroups.size() == 0) { m->mothurOut("[ERROR]: all groups removed.\n"); m->setControl_pressed(true); } } m->mothurOut("Sampling " + toString(size) + " from each group.\n"); }else{ if (pickedGroups) { int total = 0; for(int i = 0; i < Groups.size(); i++) { if (countfile == "") { total += groupMap.getNumSeqs(Groups[i]); } else { total += ct.getGroupCount(Groups[i]); } } if (size == 0) { //user has not set size, set size = 10% samples size size = int (total * 0.10); } if (total < size) { if (size != 0) { m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + ".\n"); } size = int (total * 0.10); } m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + ".\n"); }else { if (size == 0) { //user has not set size, set size = 10% samples size if (countfile == "") { size = int (list->getNumSeqs() * 0.10); } else { size = int (ct.getNumSeqs() * 0.10); } } int thisSize = 0; if (countfile == "") { thisSize = list->getNumSeqs(); } else { thisSize = ct.getNumSeqs(); } if (size > thisSize) { m->mothurOut("Your list file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + ".\n"); size = thisSize; } m->mothurOut("Sampling " + toString(size) + " from " + toString(thisSize) + ".\n"); } } set subset; //dont want repeat sequence names added if (countfile == "") { SubSample sample; if (groupfile != "") { //use group file names GroupMap sampledGm = sample.getSample(groupMap, size, Groups, persample); vector sampledSeqs = sampledGm.getNamesSeqs(); for (int i = 0; i < sampledSeqs.size(); i++) { subset.insert(sampledSeqs[i]); } string groupOutputDir = outputdir; if (outputdir == "") { groupOutputDir += util.hasPath(groupfile); } map variables; variables["[filename]"] = groupOutputDir + util.getRootName(util.getSimpleName(groupfile)); variables["[extension]"] = util.getExtension(groupfile); string groupOutputFileName = getOutputFileName("group", variables); outputTypes["group"].push_back(groupOutputFileName); outputNames.push_back(groupOutputFileName); sampledGm.print(groupOutputFileName); }else { vector names; //use list file names //fill names for (int i = 0; i < list->getNumBins(); i++) { string binnames = list->get(i); vector thisBin; util.splitAtComma(binnames, thisBin); for(int j=0;jmothurOut("[ERROR]: " + thisBin[j] + " is not in your groupfile. please correct.\n"); group = "NOTFOUND"; } //if hte user picked groups, we only want to keep the names of sequences from those groups if (pickedGroups) { if (util.inUsersGroups(group, Groups)) { names.push_back(thisBin[j]); } } else{ names.push_back(thisBin[j]); } }//save everyone, group else{ names.push_back(thisBin[j]); } } } util.mothurRandomShuffle(names); for (int i = 0; i < size; i++) { subset.insert(names[i]); } } }else { SubSample sample; CountTable sampledCt; if (withReplacement) { sampledCt = sample.getSampleWithReplacement(ct, size, Groups, persample); } else { sampledCt = sample.getSample(ct, size, Groups, persample); } vector sampledSeqs = sampledCt.getNamesOfSeqs(); for (int i = 0; i < sampledSeqs.size(); i++) { subset.insert(sampledSeqs[i]); } string countOutputDir = outputdir; if (outputdir == "") { countOutputDir += util.hasPath(countfile); } map variables; variables["[filename]"] = countOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string countOutputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(countOutputFileName); outputNames.push_back(countOutputFileName); sampledCt.printTable(countOutputFileName); } while (list != nullptr) { if (m->getControl_pressed()) { delete list; break; } processList(list, subset); delete list; list = util.getNextList(input, allLines, userLabels, processedLabels, lastLabel); } if (list != nullptr) { delete list; } if (taxonomyfile != "") { if (namefile == "") { InputData input(listfile, "list", Groups); ListVector* list = input.getListVector(); string lastLabel = list->getLabel(); for (int i = 0; i < list->getNumBins(); i++) { vector temp; string bin = list->get(i); util.splitAtComma(bin, temp); for (int j = 0; j < temp.size(); j++) { vector tempFakeOut; tempFakeOut.push_back(temp[j]); nameMap[temp[j]] = tempFakeOut; } } delete list; int tcount = getTax(subset); if (tcount != subset.size()) { m->mothurOut("[ERROR]: subsampled list file contains " + toString(subset.size()) + " sequences, but I only found " + toString(tcount) + " in your taxonomy file, did you forget a name file? Please correct.\n"); } }else { string tempAccnos = "temp.accnos"; ofstream outAccnos; util.openOutputFile(tempAccnos, outAccnos); for (set::iterator it = subset.begin(); it != subset.end(); it++) { outAccnos << *it << endl; } outAccnos.close(); m->mothurOut("Sampling taxonomy and name file... \n"); string thisNameOutputDir = outputdir; if (outputdir == "") { thisNameOutputDir += util.hasPath(namefile); } map variables; variables["[filename]"] = thisNameOutputDir + util.getRootName(util.getSimpleName(namefile)); variables["[extension]"] = util.getExtension(namefile); string outputNameFileName = getOutputFileName("name", variables); string thisTaxOutputDir = outputdir; if (outputdir == "") { thisTaxOutputDir += util.hasPath(taxonomyfile); } variables["[filename]"] = thisTaxOutputDir + util.getRootName(util.getSimpleName(taxonomyfile)); variables["[extension]"] = util.getExtension(taxonomyfile); string outputTaxFileName = getOutputFileName("taxonomy", variables); //use unique.seqs to create new name and fastafile string inputString = "dups=f, name=" + namefile + ", taxonomy=" + taxonomyfile + ", accnos=" + tempAccnos; m->mothurOut("/******************************************/\n"); m->mothurOut("Running command: get.seqs(" + inputString + ")\n"); current->setMothurCalling(true); Command* getCommand = new GetSeqsCommand(inputString); getCommand->execute(); map > filenames = getCommand->getOutputFiles(); delete getCommand; current->setMothurCalling(false); util.renameFile(filenames["name"][0], outputNameFileName); util.renameFile(filenames["taxonomy"][0], outputTaxFileName); outputTypes["name"].push_back(outputNameFileName); outputNames.push_back(outputNameFileName); outputNames.push_back(outputTaxFileName); outputTypes["taxonomy"].push_back(outputTaxFileName); m->mothurOut("/******************************************/\nDone.\n"); } } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleList"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::processList(ListVector*& list, set& subset) { try { string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(listfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(listfile)); variables["[extension]"] = util.getExtension(listfile); variables["[distance]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); int numBins = list->getNumBins(); ListVector* temp = new ListVector(); temp->setLabel(list->getLabel()); vector binLabels = list->getLabels(); vector newLabels; for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { break; } string bin = list->get(i); vector binnames; util.splitAtComma(bin, binnames); string newNames = ""; for(int j=0;jpush_back(newNames); newLabels.push_back(binLabels[i]); } } temp->setLabels(newLabels); delete list; list = temp; if (m->getControl_pressed()) { out.close(); return 0; } list->print(out, false); out.close(); if (constaxonomyfile != "") { //select otus from constaxonomy that are in new list file. Also adjust the size column in the constaxonomy file string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(constaxonomyfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(constaxonomyfile)); variables["[extension]"] = util.getExtension(constaxonomyfile); variables["[distance]"] = list->getLabel(); string consOutputFileName = getOutputFileName("constaxonomy", variables); ifstream in; util.openInputFile(constaxonomyfile, in); //read headers string headers = util.getline(in); gobble(in); ofstream outCons; util.openOutputFile(consOutputFileName, outCons); outputTypes["constaxonomy"].push_back(consOutputFileName); outputNames.push_back(consOutputFileName); outCons << headers << endl; while (!in.eof()) { if (m->getControl_pressed()) { break; } string otu = ""; string tax = "unknown"; int size = 0; in >> otu; gobble(in); in >> size; gobble(in); tax = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + otu + toString(size) + tax + "\n"); } size = list->getOTUTotal(otu); if (size != 0) { outCons << otu << '\t' << size << '\t' << tax << endl; } } in.close(); outCons.close(); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "processList"); exit(1); } } //********************************************************************************************************************** void SubSampleCommand::getSubSampleRabund() { try { InputData input(rabundfile, "rabund", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; RAbundVector* rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); if (size == 0) { //user has not set size, set size = 10% size = int((rabund->getNumSeqs()) * 0.10); }else if (size > rabund->getNumSeqs()) { m->mothurOut("The size you selected is too large, skipping rabund file.\n"); delete rabund; return; } m->mothurOut("Sampling " + toString(size) + " from " + toString(rabund->getNumSeqs()) + ".\n"); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(rabundfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(rabundfile)); variables["[extension]"] = util.getExtension(rabundfile); string outputFileName = getOutputFileName("rabund", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName); while (rabund != nullptr) { if (m->getControl_pressed()) { delete rabund; break; } processRabund(rabund, out); delete rabund; rabund = util.getNextRAbund(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleRabund"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::processRabund(RAbundVector*& rabund, ofstream& out) { try { RAbundVector* copy = new RAbundVector(*rabund); SubSample sample; if (withReplacement) { sample.getSampleWithReplacement(copy, size); }else { sample.getSample(copy, size); } copy->print(out); delete copy; return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "processRabund"); exit(1); } } //********************************************************************************************************************** void SubSampleCommand::getSubSampleSabund() { try { InputData input(sabundfile, "sabund", nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SAbundVector* sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); if (size == 0) { //user has not set size, set size = 10% size = int((sabund->getNumSeqs()) * 0.10); }else if (size > sabund->getNumSeqs()) { m->mothurOut("The size you selected is too large, skipping sabund file.\n"); delete sabund; return; } m->mothurOut("Sampling " + toString(size) + " from " + toString(sabund->getNumSeqs()) + ".\n"); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(sabundfile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(sabundfile)); variables["[extension]"] = util.getExtension(sabundfile); string outputFileName = getOutputFileName("sabund", variables); ofstream out; util.openOutputFile(outputFileName, out); outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName); while (sabund != nullptr) { if (m->getControl_pressed()) { delete sabund; break; } processSabund(sabund, out); delete sabund; sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); } out.close(); } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSubSampleSabund"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::processSabund(SAbundVector*& sabund, ofstream& out) { try { SAbundVector* copy = new SAbundVector(*sabund); SubSample sample; if (withReplacement) { sample.getSampleWithReplacement(copy, size); }else { sample.getSample(copy, size); } copy->print(out); delete copy; return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "processSabund"); exit(1); } } //********************************************************************************************************************** int SubSampleCommand::getTax(set& subset) { try { string thisTaxOutputDir = outputdir; if (outputdir == "") { thisTaxOutputDir += util.hasPath(taxonomyfile); } map variables; variables["[filename]"] = thisTaxOutputDir + util.getRootName(util.getSimpleName(taxonomyfile)); variables["[extension]"] = util.getExtension(taxonomyfile); string outputTaxFileName = getOutputFileName("taxonomy", variables); ofstream outTax; util.openOutputFile(outputTaxFileName, outTax); outputNames.push_back(outputTaxFileName); outputTypes["taxonomy"].push_back(outputTaxFileName); //read through fasta file outputting only the names on the subsample list ifstream inTax; util.openInputFile(taxonomyfile, inTax); string tname, tax; int tcount = 0; map >::iterator itNameMap; while(!inTax.eof()){ if (m->getControl_pressed()) { inTax.close(); outTax.close(); return 0; } inTax >> tname; gobble(inTax); tax = util.getline(inTax); gobble(inTax); //does the subset contain a sequence that this sequence represents itNameMap = nameMap.find(tname); if (itNameMap != nameMap.end()) { vector nameRepresents = itNameMap->second; for (int i = 0; i < nameRepresents.size(); i++){ if (subset.count(nameRepresents[i]) != 0) { outTax << nameRepresents[i] << '\t' << tax << endl; tcount++; } } }else{ m->mothurOut("[ERROR]: " + tname + " is missing, please correct.\n"); } } inTax.close(); outTax.close(); return tcount; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getTax"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/subsamplecommand.h000077500000000000000000000034351424121717000217350ustar00rootroot00000000000000#ifndef SUBSAMPLECOMMAND_H #define SUBSAMPLECOMMAND_H /* * subsamplecommand.h * Mothur * * Created by westcott on 10/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "listvector.hpp" #include "rabundvector.hpp" #include "inputdata.h" #include "sequence.hpp" #include "counttable.h" #include "treereader.h" class SubSampleCommand : public Command { public: SubSampleCommand(string); ~SubSampleCommand() = default; vector setParameters(); string getCommandName() { return "sub.sample"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Sub.sample"; } string getDescription() { return "get a sampling of sequences from a list, shared, rabund, sabund or fasta file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, pickedGroups, allLines, persample, withReplacement; string listfile, groupfile, countfile, sharedfile, rabundfile, sabundfile, fastafile, namefile, taxonomyfile, treefile, constaxonomyfile; set labels; //holds labels to be used string groups, label; vector Groups, outputNames; int size; //vector names; map > nameMap; CountTable ct; int getSubSampleShared(); int getSubSampleList(); void getSubSampleRabund(); void getSubSampleSabund(); int getSubSampleFasta(); int getSubSampleTree(); int processShared(SharedRAbundVectors*&); int processRabund(RAbundVector*&, ofstream&); int processSabund(SAbundVector*&, ofstream&); int processList(ListVector*&, set&); vector getNames(); vector readNames(); int getTax(set&); }; #endif mothur-1.48.0/source/commands/summarycommand.cpp000077500000000000000000001001211424121717000217600ustar00rootroot00000000000000/* * summarycommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "summarycommand.h" #include "ace.h" #include "sobs.h" #include "nseqs.h" #include "chao1.h" #include "bootstrap.h" #include "simpson.h" #include "simpsoneven.h" #include "invsimpson.h" #include "npshannon.h" #include "shannon.h" #include "heip.h" #include "smithwilson.h" #include "shannoneven.h" #include "jackknife.h" #include "geom.h" #include "logsd.h" #include "qstat.h" #include "bergerparker.h" #include "bstick.h" #include "goodscoverage.h" #include "coverage.h" #include "efron.h" #include "boneh.h" #include "solow.h" #include "shen.h" #include "subsample.h" #include "shannonrange.h" //********************************************************************************************************************** vector SummaryCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(plist); CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(prabund); CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false); parameters.push_back(psabund); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","summary",false,false,true); parameters.push_back(pshared); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson-shannonrange", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter palpha("alpha", "Multiple", "0-1-2", "1", "", "", "","",false,false,true); parameters.push_back(palpha); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter psize("size", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psize); CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["summary"] = tempOutNames; abort = false; calledHelp = false; allLines = true; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SummaryCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The summary.single command parameters are list, sabund, rabund, shared, subsample, iters, label, calc, abund and groupmode. list, sabund, rabund or shared is required unless you have a valid current file.\n"; helpString += "The summary.single command should be in the following format: \n"; helpString += "summary.single(label=yourLabel, calc=yourEstimators).\n"; helpString += "Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"; helpString += validCalculator.printCalc("summary"); helpString += "The subsample parameter allows you to enter the size of the sample or you can set subsample=T and mothur will use the size of your smallest group in the case of a shared file. With a list, sabund or rabund file you must provide a subsample size.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample. Default=1000.\n"; helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"; helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"; helpString += "The alpha parameter is used to set the alpha value for the shannonrange calculator.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SummaryCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SummaryCommand::SummaryCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sabundfile = validParameter.validFile(parameters, "sabund"); if (sabundfile == "not open") { sabundfile = ""; abort = true; } else if (sabundfile == "not found") { sabundfile = ""; } else { format = "sabund"; inputfile = sabundfile; current->setSabundFile(sabundfile); } rabundfile = validParameter.validFile(parameters, "rabund"); if (rabundfile == "not open") { rabundfile = ""; abort = true; } else if (rabundfile == "not found") { rabundfile = ""; } else { format = "rabund"; inputfile = rabundfile; current->setRabundFile(rabundfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { rabundfile = current->getRabundFile(); if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter.\n"); } else { sabundfile = current->getSabundFile(); if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command.\n"); abort = true; } } } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } else { if (calc == "default") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } string temp; temp = validParameter.valid(parameters, "abund"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, abund); temp = validParameter.valid(parameters, "size"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, size); temp = validParameter.valid(parameters, "groupmode"); if (temp == "not found") { temp = "T"; } groupMode = util.isTrue(temp); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; subsampleSize = -1; } } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); temp = validParameter.valid(parameters, "alpha"); if (temp == "not found") { temp = "1"; } util.mothurConvert(temp, alpha); if ((alpha != 0) && (alpha != 1) && (alpha != 2)) { m->mothurOut("[ERROR]: Not a valid alpha value. Valid values are 0, 1 and 2.\n"); abort=true; } if (!subsample) { iters = 1; } else { //if you did not set a samplesize and are not using a sharedfile if ((subsampleSize == -1) && (format != "sharedfile")) { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size. You can do this by setting subsample=yourSampleSize.\n"); abort=true; } } } } catch(exception& e) { m->errorOut(e, "SummaryCommand", "SummaryCommand"); exit(1); } } //********************************************************************************************************************** int SummaryCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if ((format != "sharedfile")) { inputFileNames.push_back(inputfile); } else { inputFileNames = parseSharedFile(sharedfile); format = "rabund"; } if (m->getControl_pressed()) { return 0; } int numLines = 0; int numCols = 0; map groupIndex; for (int p = 0; p < inputFileNames.size(); p++) { numLines = 0; numCols = 0; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputFileNames[p])); string fileNameRoot = getOutputFileName("summary",variables); variables["[tag]"] = "ave-std"; string fileNameAve = getOutputFileName("summary",variables); if (inputFileNames.size() > 1) { m->mothurOut("\nProcessing group " + groups[p] + "\n\n"); groupIndex[fileNameRoot] = groups[p]; } fillEstimators(); //if the users entered no valid calculators don't execute command if (sumCalculators.size() == 0) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } ofstream outputFileHandle; ofstream outAve; if (subsample) { util.openOutputFile(fileNameAve, outAve); outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve); outAve << "label\tmethod"; outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); if (inputFileNames.size() > 1) { groupIndex[fileNameAve] = groups[p]; } }else { util.openOutputFile(fileNameRoot, outputFileHandle); outputFileHandle << "label"; outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot); } for(int i=0;igetCols() == 1){ if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); } else { outputFileHandle << '\t' << sumCalculators[i]->getName(); } numCols++; } else{ if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } else { outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } numCols += 3; } } if (subsample) { outAve << endl; } else { outputFileHandle << endl; } InputData input(inputFileNames[p], format, nullVector); set processedLabels; set userLabels = labels; string lastLabel = ""; SAbundVector* sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); if (m->getControl_pressed()) { if (!subsample) { outputFileHandle.close(); } else { outAve.close(); } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } for(int i=0;igetControl_pressed()) { delete sabund; break; } process(sabund, outputFileHandle, outAve); delete sabund; sabund = util.getNextSAbund(input, allLines, userLabels, processedLabels, lastLabel); numLines++; } if (subsample) { outAve.close(); } else { outputFileHandle.close(); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } for(int i=0;igetControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //create summary file containing all the groups data for each label - this function just combines the info from the files already created. if ((sharedfile != "") && (groupMode)) { vector comboNames = createGroupSummaryFile(numLines, numCols, outputNames, groupIndex); for (int i = 0; i < comboNames.size(); i++) { outputNames.push_back(comboNames[i]); } } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "execute"); exit(1); } } //********************************************************************************************************************** void SummaryCommand::fillEstimators() { try { sumCalculators.clear(); ValidCalculators validCalculator; for (int i=0; ierrorOut(e, "SummaryCommand", "fillEstimators"); exit(1); } } //********************************************************************************************************************** int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve) { try { //calculator -> data -> values vector< vector< vector > > results; results.resize(sumCalculators.size()); if (!subsample) { outputFileHandle << sabund->getLabel(); } SubSample sample; for (int thisIter = 0; thisIter < iters; thisIter++) { SAbundVector* thisIterSabund = sabund; //we want the summary results for the whole dataset, then the subsampling if (subsample) { //subsample sabund and run it //copy sabund since getSample destroys it RAbundVector rabund = sabund->getRAbundVector(); SAbundVector* newSabund = new SAbundVector(); *newSabund = rabund.getSAbundVector(); if (withReplacement) { sample.getSampleWithReplacement(newSabund, subsampleSize); } else { sample.getSample(newSabund, subsampleSize); } thisIterSabund = newSabund; } for(int i=0;i data = sumCalculators[i]->getValues(thisIterSabund); if (m->getControl_pressed()) { return 0; } if (!subsample) { outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); }else { //some of the calc have hci and lci need to make room for that if (results[i].size() == 0) { results[i].resize(data.size()); } //save results for ave and std. for (int j = 0; j < data.size(); j++) { if (m->getControl_pressed()) { return 0; } results[i][j].push_back(data[j]); } } } //cleanup memory if (subsample) { delete thisIterSabund; } } if (!subsample) { outputFileHandle << endl; } if (subsample) { outAve << sabund->getLabel() << '\t' << "ave"; //find ave and std for this label and output //will need to modify the createGroupSummary to combine results and not mess with the .summary file. //calcs -> values vector< vector > calcAverages; calcAverages.resize(sumCalculators.size()); for (int i = 0; i < calcAverages.size(); i++) { calcAverages[i].resize(results[i].size(), 0); } for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. for (int j = 0; j < calcAverages[i].size(); j++) { calcAverages[i][j] += results[i][j][thisIter]; } } } for (int i = 0; i < calcAverages.size(); i++) { //finds average. for (int j = 0; j < calcAverages[i].size(); j++) { calcAverages[i][j] /= (float) iters; outAve << '\t' << calcAverages[i][j]; } } //find standard deviation vector< vector > stdDev; stdDev.resize(sumCalculators.size()); for (int i = 0; i < stdDev.size(); i++) { stdDev[i].resize(results[i].size(), 0); } for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each for (int i = 0; i < stdDev.size(); i++) { for (int j = 0; j < stdDev[i].size(); j++) { stdDev[i][j] += ((results[i][j][thisIter] - calcAverages[i][j]) * (results[i][j][thisIter] - calcAverages[i][j])); } } } outAve << endl << sabund->getLabel() << '\t' << "std"; for (int i = 0; i < stdDev.size(); i++) { //finds average. for (int j = 0; j < stdDev[i].size(); j++) { stdDev[i][j] /= (float) iters; stdDev[i][j] = sqrt(stdDev[i][j]); outAve << '\t' << stdDev[i][j]; } } outAve << endl; } return 0; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "process"); exit(1); } } //********************************************************************************************************************** vector SummaryCommand::parseSharedFile(string filename) { try { vector filenames; map files; map::iterator it3; InputData input(filename, "sharedfile", groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); /******************************************************/ //user has not set size, set size = smallest samples size if (subsample) { if (subsampleSize == -1) { subsampleSize = lookup->getNumSeqsSmallestGroup(); m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n"); } else { lookup->removeGroups(subsampleSize); } if (lookup->size() < 1) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return filenames; } } /******************************************************/ groups = lookup->getNamesGroups(); //clears file before we start to write to it below string sharedFileRoot = util.getRootName(filename); for (int i=0; i data = lookup->getSharedRAbundVectors(); for (int i = 0; i < data.size(); i++) { ofstream temp; string group = data[i]->getGroup(); util.openOutputFileAppend(files[group], temp); data[i]->getRAbundVector().print(temp); temp.close(); delete data[i]; } delete lookup; lookup = input.getSharedRAbundVectors(); } return filenames; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "parseSharedFile"); exit(1); } } //********************************************************************************************************************** vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector& outputNames, map groupIndex) { try { //open each groups summary file vector newComboNames; map > > files; map filesTypesLabels; map filesTypesNumLines; for (int i=0; i thisFilesLines; ifstream temp; util.openInputFile(outputNames[i], temp); //read through first line - labels string labelsLine = util.getline(temp); vector theseLabels = util.splitWhiteSpace(labelsLine); string newLabel = ""; for (int j = 0; j < theseLabels.size(); j++) { if (j == 1) { newLabel += "group\t" + theseLabels[j]; } else if (j == 0) { newLabel += theseLabels[j] + "\t"; } else{ newLabel += '\t' + theseLabels[j]; } } gobble(temp); int stop = numLines; if (theseLabels.size() != numCols+1) { stop = numLines*2; } //for each label for (int k = 0; k < stop; k++) { string thisLine = ""; string tempLabel; for (int j = 0; j < theseLabels.size(); j++) { temp >> tempLabel; //save for later if (j == 1) { thisLine += groupIndex[outputNames[i]] + "\t" + tempLabel; } else if (j == 0) { thisLine += tempLabel + "\t"; } else{ thisLine += "\t" + tempLabel; } } thisLine += "\n"; thisFilesLines.push_back(thisLine); gobble(temp); } string extension = util.getExtension(outputNames[i]); if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension; } string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + extension; util.mothurRemove(combineFileName); //remove old file filesTypesLabels[extension] = newLabel; filesTypesNumLines[extension] = stop; map > >::iterator itFiles = files.find(extension); if (itFiles != files.end()) { //add new files info to existing type files[extension][outputNames[i]] = thisFilesLines; }else { map > thisFile; thisFile[outputNames[i]] = thisFilesLines; files[extension] = thisFile; } temp.close(); util.mothurRemove(outputNames[i]); } for (map > >::iterator itFiles = files.begin(); itFiles != files.end(); itFiles++) { if (m->getControl_pressed()) { break; } string extension = itFiles->first; map > thisType = itFiles->second; string combineFileName = outputdir + util.getRootName(util.getSimpleName(sharedfile)) + "groups" + extension; newComboNames.push_back(combineFileName); //open combined file ofstream out; util.openOutputFile(combineFileName, out); //output label line to new file out << filesTypesLabels[extension] << endl; //for each label for (int k = 0; k < filesTypesNumLines[extension]; k++) { //grab summary data for each group for (map >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) { out << (itType->second)[k]; } } outputNames.clear(); out.close(); } //return combine file name return newComboNames; } catch(exception& e) { m->errorOut(e, "SummaryCommand", "createGroupSummaryFile"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/summarycommand.h000077500000000000000000000027771424121717000214470ustar00rootroot00000000000000#ifndef SUMMARYCOMMAND_H #define SUMMARYCOMMAND_H /* * summarycommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "sabundvector.hpp" #include "inputdata.h" #include "calculator.h" #include "validcalculator.h" class SummaryCommand : public Command { public: SummaryCommand(string); ~SummaryCommand(){} vector setParameters(); string getCommandName() { return "summary.single"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Summary.single"; } string getDescription() { return "generate summary file that has the calculator value for each line in the OTU data"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector sumCalculators; int abund, size, iters, subsampleSize, alpha; bool abort, allLines, groupMode, subsample, withReplacement; set labels; //holds labels to be used string label, calc, sharedfile, listfile, rabundfile, sabundfile, format, inputfile; vector Estimators; vector inputFileNames, outputNames; vector groups; vector parseSharedFile(string); vector createGroupSummaryFile(int, int, vector&, map); int process(SAbundVector*&, ofstream&, ofstream&); void fillEstimators(); }; #endif mothur-1.48.0/source/commands/summaryqualcommand.cpp000077500000000000000000000355171424121717000226630ustar00rootroot00000000000000/* * summaryqualcommand.cpp * Mothur * * Created by westcott on 11/28/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "summaryqualcommand.h" #include "counttable.h" //********************************************************************************************************************** vector SummaryQualCommand::setParameters(){ try { CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pqual); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["summary"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SummaryQualCommand::getHelpString(){ try { string helpString = ""; helpString += "The summary.qual command reads a quality file and an optional name or count file, and summarizes the quality information.\n"; helpString += "The summary.qual command parameters are qfile, name, count and processors. qfile is required, unless you have a valid current quality file.\n"; helpString += "The name parameter allows you to enter a name file associated with your quality file. \n"; helpString += "The count parameter allows you to enter a count file associated with your quality file. \n"; helpString += "The summary.qual command should be in the following format: \n"; helpString += "summary.qual(qfile=yourQualityFile) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SummaryQualCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],qual.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SummaryQualCommand::SummaryQualCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; qualfile = validParameter.validFile(parameters, "qfile"); if (qualfile == "not open") { qualfile = ""; abort = true; } else if (qualfile == "not found") { qualfile = current->getQualFile(); if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter.\n"); } else { m->mothurOut("You have no current quality file and the qfile parameter is required.\n"); abort = true; } }else { current->setQualFile(qualfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } if (outputdir == ""){ outputdir += util.hasPath(qualfile); } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); } } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "SummaryQualCommand"); exit(1); } } //*************************************************************************************************************** int SummaryQualCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); long long numSeqs = 0; hasNameMap = false; vector position; vector averageQ; vector< vector > scores; if (m->getControl_pressed()) { return 0; } if (namefile != "") { hasNameMap = true; nameMap = util.readNames(namefile); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); hasNameMap = true; } numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile); if (m->getControl_pressed()) { return 0; } //print summary file map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(qualfile)); string summaryFile = getOutputFileName("summary",variables); printQual(summaryFile, position, averageQ, scores); if (m->getControl_pressed()) { util.mothurRemove(summaryFile); return 0; } //output results to screen cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); m->mothurOut("\nPosition\tNumSeqs\tAverageQ\n"); for (int i = 0; i < position.size(); i+=100) { float average = averageQ[i] / (float) position[i]; cout << i << '\t' << position[i] << '\t' << average << '\n'; m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)+"\n"); } outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences.\n\n"); m->mothurOut("Output File Names: \n"); m->mothurOut(summaryFile+"\n\n"); return 0; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "execute"); exit(1); } } /**************************************************************************************************/ //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct seqSumQualData { vector position; vector averageQ; vector< vector > scores; string filename; unsigned long long start; unsigned long long end; int count, numSeqs; MothurOut* m; bool hasNameMap; map nameMap; Utils util; ~seqSumQualData(){} seqSumQualData(string f, unsigned long long st, unsigned long long en, bool n, map nam) { filename = f; m = MothurOut::getInstance(); start = st; end = en; hasNameMap = n; nameMap = nam; count = 0; } }; /**************************************************************************************/ void driverCreateSummary(seqSumQualData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //adjust start if null strings if (params->start == 0) { params->util.zapGremlins(in); gobble(in); } bool done = false; params->count = 0; int count = 0; while (!done) { if (params->m->getControl_pressed()) { in.close(); break; } QualityScores current(in); gobble(in); if (current.getName() != "") { int num = 1; if (params->hasNameMap) { //make sure this sequence is in the namefile, else error map::iterator it = params->nameMap.find(current.getName()); if (it == params->nameMap.end()) { params->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct.\n"); params->m->setControl_pressed(true); } else { num = it->second; } } vector thisScores = current.getScores(); //resize to num of positions setting number of seqs with that size to 1 if (params->position.size() < thisScores.size()) { params->position.resize(thisScores.size(), 0); } if (params->averageQ.size() < thisScores.size()) { params->averageQ.resize(thisScores.size(), 0); } if (params->scores.size() < thisScores.size()) { params->scores.resize(thisScores.size()); for (int i = 0; i < params->scores.size(); i++) { params->scores[i].resize(41, 0); } } //increase counts of number of seqs with this position //average is really the total, we will average in execute for (int i = 0; i < thisScores.size(); i++) { params->position[i] += num; params->averageQ[i] += (thisScores[i] * num); //weighting for namesfile if (thisScores[i] > 41) { params->m->mothurOut("[WARNING]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40. Setting to 40.\n"); thisScores[i] = 40; } else { params->scores[i][thisScores[i]] += num; } } params->count += num; //totalSeqs count++; //uniqueSeqs } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if ((count == params->end) || (in.eof())) { break; } #endif } in.close(); } catch(exception& e) { params->m->errorOut(e, "SummaryQualCommand", "driverCreateSummary"); exit(1); } } /**************************************************************************************************/ long long SummaryQualCommand::createProcessesCreateSummary(vector& position, vector& averageQ, vector< vector >& scores, string filename) { try { long long numSeqs = 0; vector positions; vector lines; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosFasta(qualfile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //string f, unsigned long long st, unsigned long long en, bool n, map nam //Lauch worker threads for (int i = 0; i < processors-1; i++) { seqSumQualData* dataBundle = new seqSumQualData(filename, lines[i+1].start, lines[i+1].end, hasNameMap, nameMap); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverCreateSummary, dataBundle)); } seqSumQualData* dataBundle = new seqSumQualData(filename, lines[0].start, lines[0].end, hasNameMap, nameMap); driverCreateSummary(dataBundle); numSeqs = dataBundle->count; position = dataBundle->position; averageQ = dataBundle->averageQ; scores = dataBundle->scores; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); numSeqs += data[i]->count; int tempNum = data[i]->position.size(); if (position.size() < tempNum) { position.resize(tempNum, 0); } if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); } if (scores.size() < tempNum) { scores.resize(tempNum); for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } } for (int k = 0; k < tempNum; k++) { position[k] += data[i]->position[k]; } for (int k = 0; k < tempNum; k++) { averageQ[k] += data[i]->averageQ[k]; } for (int k = 0; k < tempNum; k++) { for (int j = 0; j < 41; j++) { scores[k][j] += data[i]->scores[k][j]; } } delete data[i]; delete workerThreads[i]; } delete dataBundle; return numSeqs; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "createProcessesCreateSummary"); exit(1); } } /**************************************************************************************************/ int SummaryQualCommand::printQual(string sumFile, vector& position, vector& averageQ, vector< vector >& scores) { try { ofstream out; util.openOutputFile(sumFile, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); outputNames.push_back(sumFile); outputTypes["summary"].push_back(sumFile); //print headings out << "Position\tnumSeqs\tAverageQ"; for (int i = 0; i < 41; i++) { out << '\t' << "q" << i; } out << endl; for (int i = 0; i < position.size(); i++) { if (m->getControl_pressed()) { out.close(); return 0; } double average = averageQ[i] / (float) position[i]; out << i << '\t' << position[i] << '\t' << average; for (int j = 0; j < 41; j++) { out << '\t' << scores[i][j]; } out << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "SummaryQualCommand", "printQual"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/summaryqualcommand.h000077500000000000000000000025071424121717000223210ustar00rootroot00000000000000#ifndef SUMMARYQUALCOMMAND_H #define SUMMARYQUALCOMMAND_H /* * summaryqualcommand.h * Mothur * * Created by westcott on 11/28/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "qualityscores.h" /**************************************************************************************************/ class SummaryQualCommand : public Command { public: SummaryQualCommand(string); ~SummaryQualCommand(){} vector setParameters(); string getCommandName() { return "summary.qual"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Summary.qual"; } string getDescription() { return "summarize the quality of a set of sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, hasNameMap; string qualfile, namefile, countfile; vector outputNames; map nameMap; int processors; long long createProcessesCreateSummary(vector&, vector&, vector< vector >&, string); int printQual(string, vector&, vector&, vector< vector >&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/summarysharedcommand.cpp000077500000000000000000001221261424121717000231600ustar00rootroot00000000000000/* * summarysharedcommand.cpp * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "summarysharedcommand.h" #include "subsample.h" //********************************************************************************************************************** vector SummarySharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance); CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput); CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["summary"] = tempOutNames; outputTypes["phylip"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SummarySharedCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The summary.shared command parameters are " + getCommandParameters() + ".\nThe shared is required if there is no current sharedfile.\n"; helpString += "The summary.shared command should be in the following format: \n"; helpString += "summary.shared(shared=yourSharedFile).\n"; helpString += "Example summary.shared(shared=final.opti_mcc.shared)\n"; helpString += validCalculator.printCalc("sharedsummary"); helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n"; helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n"; helpString += "The default value for groups is all the groups in your groupfile.\n"; helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n"; helpString += "If you use sharedchao and run into memory issues, set all to false. \n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SummarySharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],summary-[filename],[tag],summary"; } else if (type == "phylip") { pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** SummarySharedCommand::SummarySharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { m->mothurOut("You have no current sharedfile and the shared parameter is required.\n"); abort = true; } }else { current->setSharedFile(sharedfile); } if (outputdir == ""){ outputdir = util.hasPath(sharedfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } else { if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } string temp = validParameter.valid(parameters, "all"); if (temp == "not found") { temp = "false"; } all = util.isTrue(temp); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "lt"; } else { createPhylip = true; } if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt.\n"); output = "lt"; } temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (!subsample) { iters = 1; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); temp = validParameter.valid(parameters, "distance"); if (temp == "not found") { temp = "false"; } createPhylip = util.isTrue(temp); if (subsample) { createPhylip = true; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); mult = false; numCalcs = 0; } } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand"); exit(1); } } //********************************************************************************************************************** int SummarySharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } ValidCalculators validCalculator; vector sumCalculators; for (int i=0; i variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); string outputFileName = getOutputFileName("summary",variables); //if the users entered no valid calculators don't execute command if (Estimators.size() == 0) { return 0; } //check if any calcs can do multiples else{ if (all){ for (int i = 0; i < sumCalculators.size(); i++) { if (sumCalculators[i]->getMultiple() ) { mult = true; } } } } InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); /******************************************************/ //output headings for files /******************************************************/ //output estimator names as column headers if (!subsample) { util.openOutputFile(outputFileName, outputFileHandle); outputFileHandle << "label" <<'\t' << "comparison" << '\t'; for(int i=0;igetName(); if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } } outputFileHandle << endl; outputFileHandle.close(); } //create file and put column headers for multiple groups file variables["[tag]"]= "multiple"; string outAllFileName = getOutputFileName("summary",variables); if (mult ) { if (!subsample) { util.openOutputFile(outAllFileName, outAll); outputNames.push_back(outAllFileName); outAll << "label" <<'\t' << "comparison" << '\t'; for(int i=0;igetMultiple() ) { outAll << '\t' << sumCalculators[i]->getName(); } } outAll << endl; outAll.close(); } } if (lookup->size() < 2) { m->mothurOut("I cannot run the command without at least 2 valid groups."); delete lookup; if (!subsample) { //close files and clean up util.mothurRemove(outputFileName); if (mult ) { util.mothurRemove(outAllFileName); } } return 0; //if you only have 2 groups you don't need a .sharedmultiple file }else if ((lookup->size() == 2) && (mult )) { mult = false; util.mothurRemove(outAllFileName); outputNames.pop_back(); } if (m->getControl_pressed()) { if (mult) { util.mothurRemove(outAllFileName); } util.mothurRemove(outputFileName); delete lookup; for(int i=0;igetNumSeqsSmallestGroup(); }else { lookup->removeGroups(subsampleSize); Groups = lookup->getNamesGroups(); } if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return 0; } } /******************************************************/ //comparison breakup to be used by different processes later numGroups = lookup->size(); numCalcs = sumCalculators.size(); for(int i=0;igetName()); } lines.resize(processors); for (int i = 0; i < processors; i++) { lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); } /******************************************************/ vector currentLabels = lookup->getOTUNames(); while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } process(lookup, outputFileName, outAllFileName, currentLabels); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } for(int i=0;igetControl_pressed()) { util.mothurRemove(outAllFileName); util.mothurRemove(outputFileName); return 0; } m->mothurOut("\nOutput File Names:\n"); if (!subsample) { m->mothurOut(outputFileName+"\n"); if (mult) { m->mothurOut(outAllFileName+"\n"); outputTypes["summary"].push_back(outAllFileName); } outputTypes["summary"].push_back(outputFileName); }else { for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]+"\n"); } } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "execute"); exit(1); } } /***********************************************************/ int SummarySharedCommand::printSims(ostream& out, vector< vector >& simMatrix, vector theseGroups) { try { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //output num seqs out << simMatrix.size() << endl; if (output == "lt") { for (int b = 0; b < simMatrix.size(); b++) { out << theseGroups[b]; for (int n = 0; n < b; n++) { if (m->getControl_pressed()) { return 0; } out << '\t' << simMatrix[b][n]; } out << endl; } }else{ for (int b = 0; b < simMatrix.size(); m++) { out << theseGroups[b]; for (int n = 0; n < simMatrix[b].size(); n++) { if (m->getControl_pressed()) { return 0; } out << '\t' << simMatrix[b][n]; } out << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "printSims"); exit(1); } } /**************************************************************************************************/ void driverSummaryShared(summarySharedData* params) { try { vector sumCalculators; ValidCalculators validCalculator; for (int i=0; iEstimators.size(); i++) { if (validCalculator.isValidCalculator("sharedsummary", params->Estimators[i]) ) { if (params->Estimators[i] == "sharedsobs") { sumCalculators.push_back(new SharedSobsCS()); }else if (params->Estimators[i] == "sharedchao") { sumCalculators.push_back(new SharedChao1()); }else if (params->Estimators[i] == "sharedace") { sumCalculators.push_back(new SharedAce()); }else if (params->Estimators[i] == "jabund") { sumCalculators.push_back(new JAbund()); }else if (params->Estimators[i] == "sorabund") { sumCalculators.push_back(new SorAbund()); }else if (params->Estimators[i] == "jclass") { sumCalculators.push_back(new Jclass()); }else if (params->Estimators[i] == "sorclass") { sumCalculators.push_back(new SorClass()); }else if (params->Estimators[i] == "jest") { sumCalculators.push_back(new Jest()); }else if (params->Estimators[i] == "sorest") { sumCalculators.push_back(new SorEst()); }else if (params->Estimators[i] == "thetayc") { sumCalculators.push_back(new ThetaYC()); }else if (params->Estimators[i] == "thetan") { sumCalculators.push_back(new ThetaN()); }else if (params->Estimators[i] == "kstest") { sumCalculators.push_back(new KSTest()); }else if (params->Estimators[i] == "sharednseqs") { sumCalculators.push_back(new SharedNSeqs()); }else if (params->Estimators[i] == "ochiai") { sumCalculators.push_back(new Ochiai()); }else if (params->Estimators[i] == "anderberg") { sumCalculators.push_back(new Anderberg()); }else if (params->Estimators[i] == "kulczynski") { sumCalculators.push_back(new Kulczynski()); }else if (params->Estimators[i] == "kulczynskicody") { sumCalculators.push_back(new KulczynskiCody()); }else if (params->Estimators[i] == "lennon") { sumCalculators.push_back(new Lennon()); }else if (params->Estimators[i] == "morisitahorn") { sumCalculators.push_back(new MorHorn()); }else if (params->Estimators[i] == "braycurtis") { sumCalculators.push_back(new BrayCurtis()); }else if (params->Estimators[i] == "whittaker") { sumCalculators.push_back(new Whittaker()); }else if (params->Estimators[i] == "odum") { sumCalculators.push_back(new Odum()); }else if (params->Estimators[i] == "canberra") { sumCalculators.push_back(new Canberra()); }else if (params->Estimators[i] == "structeuclidean") { sumCalculators.push_back(new StructEuclidean()); }else if (params->Estimators[i] == "structchord") { sumCalculators.push_back(new StructChord()); }else if (params->Estimators[i] == "hellinger") { sumCalculators.push_back(new Hellinger()); }else if (params->Estimators[i] == "manhattan") { sumCalculators.push_back(new Manhattan()); }else if (params->Estimators[i] == "structpearson") { sumCalculators.push_back(new StructPearson()); }else if (params->Estimators[i] == "soergel") { sumCalculators.push_back(new Soergel()); }else if (params->Estimators[i] == "spearman") { sumCalculators.push_back(new Spearman()); }else if (params->Estimators[i] == "structkulczynski") { sumCalculators.push_back(new StructKulczynski()); }else if (params->Estimators[i] == "speciesprofile") { sumCalculators.push_back(new SpeciesProfile()); }else if (params->Estimators[i] == "hamming") { sumCalculators.push_back(new Hamming()); }else if (params->Estimators[i] == "structchi2") { sumCalculators.push_back(new StructChi2()); }else if (params->Estimators[i] == "gower") { sumCalculators.push_back(new Gower()); }else if (params->Estimators[i] == "memchi2") { sumCalculators.push_back(new MemChi2()); }else if (params->Estimators[i] == "memchord") { sumCalculators.push_back(new MemChord()); }else if (params->Estimators[i] == "memeuclidean") { sumCalculators.push_back(new MemEuclidean()); }else if (params->Estimators[i] == "mempearson") { sumCalculators.push_back(new MemPearson()); }else if (params->Estimators[i] == "jsd") { sumCalculators.push_back(new JSD()); }else if (params->Estimators[i] == "rjsd") { sumCalculators.push_back(new RJSD()); } } } params->calcDists.resize(sumCalculators.size()); //loop through calculators and add to file all for all calcs that can do mutiple groups if (params->mult && params->main) { ofstream outAll; if (!params->subsample) { //print names params->util.openOutputFile(params->sumAllFile, outAll); outAll << params->thisLookup[0]->getLabel() << '\t'; //output label //output groups names string outNames = ""; for (int j = 0; j < params->thisLookup.size(); j++) { outNames += params->thisLookup[j]->getGroup() + "-"; } outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-'; outAll << outNames << '\t'; } for(int i=0;igetMultiple() ) { sumCalculators[i]->getValues(params->thisLookup); if (params->m->getControl_pressed()) { break; } if (!params->subsample) { outAll << '\t'; sumCalculators[i]->print(outAll); } } } if (!params->subsample) { outAll << endl; outAll.close(); } } ofstream outputFileHandle; if (!params->subsample) { params->util.openOutputFile(params->sumFile, outputFileHandle); } vector subset; for (int k = params->start; k < params->end; k++) { // pass cdd each set of groups to compare if (params->m->getControl_pressed()) { break; } for (int l = 0; l < k; l++) { subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds subset.push_back(params->thisLookup[k]); subset.push_back(params->thisLookup[l]); if (!params->subsample) { outputFileHandle << params->thisLookup[0]->getLabel() << '\t'; //sort groups to be alphanumeric if (params->thisLookup[k]->getGroup() > params->thisLookup[l]->getGroup()) { outputFileHandle << (params->thisLookup[l]->getGroup() +'\t' + params->thisLookup[k]->getGroup()) << '\t'; //print out groups }else{ outputFileHandle << (params->thisLookup[k]->getGroup() +'\t' + params->thisLookup[l]->getGroup()) << '\t'; //print out groups } } for(int i=0;igetNeedsAll()) { //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < params->thisLookup.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(params->thisLookup[w]); } } } vector tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs if (params->m->getControl_pressed()) { break; } if (!params->subsample) { outputFileHandle << '\t'; sumCalculators[i]->print(outputFileHandle); } seqDist temp(l, k, tempdata[0]); params->calcDists[i].push_back(temp); } if (!params->subsample) { outputFileHandle << endl; } } } if (!params->subsample) { outputFileHandle.close(); } for(int i=0;im->errorOut(e, "SummarySharedCommand", "driverSummaryShared"); exit(1); } } /***********************************************************/ int SummarySharedCommand::runCalcs(SharedRAbundVectors*& thisItersLookup, string sumFileName, string sumAllFile, vector< vector >& calcDists) { try{ //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { // Allocate memory for thread data. string extension = toString(i+1) + ".temp"; summarySharedData* dataBundle = new summarySharedData(sumFileName+extension, sumAllFile+extension, m, lines[i+1].start, lines[i+1].end, Estimators, thisItersLookup, false, mult, subsample); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverSummaryShared, dataBundle)); } //make copy of lookup so we don't get access violations //SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisItersLookup); string extension = toString(0) + ".temp"; summarySharedData* dataBundle = new summarySharedData(sumFileName+extension, sumAllFile+extension, m, lines[0].start, lines[0].end, Estimators, thisItersLookup, true, mult, subsample); driverSummaryShared(dataBundle); for (int k = 0; k < calcDists.size(); k++) { int size = dataBundle->calcDists[k].size(); for (int j = 0; j < size; j++) { calcDists[k].push_back(dataBundle->calcDists[k][j]); } } if (!subsample) { util.appendFiles((sumFileName + extension), sumFileName); util.mothurRemove((sumFileName + extension)); if (mult) { util.appendFiles((sumAllFile + extension), sumAllFile); util.mothurRemove((sumAllFile + extension)); } } for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); if (!subsample) { string extension = toString(i+1) + ".temp"; util.appendFiles((sumFileName + extension), sumFileName); util.mothurRemove((sumFileName + extension)); } if (createPhylip) { for (int k = 0; k < calcDists.size(); k++) { int size = data[i]->calcDists[k].size(); for (int j = 0; j < size; j++) { calcDists[k].push_back(data[i]->calcDists[k][j]); } } } delete data[i]; delete workerThreads[i]; } delete dataBundle; return 0; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "runCalcs"); exit(1); } } /***********************************************************/ int SummarySharedCommand::process(SharedRAbundVectors* thisLookup, string sumFileName, string sumAllFileName, vector currentLabels) { try { vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector > calcDists; calcDists.resize(numCalcs); SubSample sample; for (int thisIter = 0; thisIter < iters; thisIter++) { SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*thisLookup); if (subsample) { //we want the summary results for the whole dataset, then the subsampling //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisItersLookup); if (withReplacement) { currentLabels = sample.getSampleWithReplacement(newLookup, subsampleSize); } else { currentLabels = sample.getSample(newLookup, subsampleSize); } delete thisItersLookup; thisItersLookup = newLookup; } runCalcs(thisItersLookup, sumFileName, sumAllFileName, calcDists); if (subsample) { //we want the summary results for the whole dataset, then the subsampling calcDistsTotals.push_back(calcDists); delete thisItersLookup; }else { if (createPhylip) { for (int i = 0; i < calcDists.size(); i++) { if (m->getControl_pressed()) { break; } //initialize matrix vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup->size()); vector GroupNames = thisLookup->getNamesGroups(); for (int k = 0; k < thisLookup->size(); k++) { matrix[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[calc]"] = sumCalculatorsNames[i]; variables["[distance]"] = thisLookup->getLabel(); variables["[outputtag]"] = output; variables["[tag2]"] = ""; string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outDist; util.openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); printSims(outDist, matrix, GroupNames); outDist.close(); } } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } if (subsample) { //we need to find the average distance and standard deviation for each groups distance vector< vector > calcAverages = util.getAverages(calcDistsTotals); //find standard deviation vector< vector > stdDev = util.getStandardDeviation(calcDistsTotals, calcAverages); //print results for (int i = 0; i < calcDists.size(); i++) { vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { matrix[k].resize(thisLookup->size(), 0.0); } vector< vector > stdmatrix; //square matrix to represent the stdDev stdmatrix.resize(thisLookup->size()); vector GroupNames = thisLookup->getNamesGroups(); for (int k = 0; k < thisLookup->size(); k++) { stdmatrix[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; float stdDist = stdDev[i][j].dist; matrix[row][column] = dist; matrix[column][row] = dist; stdmatrix[row][column] = stdDist; stdmatrix[column][row] = stdDist; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(sharedfile)); variables["[calc]"] = sumCalculatorsNames[i]; variables["[distance]"] = thisLookup->getLabel(); variables["[outputtag]"] = output; variables["[tag2]"] = "ave"; string distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outAve; util.openOutputFile(distFileName, outAve); outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); printSims(outAve, matrix, GroupNames); outAve.close(); variables["[tag2]"] = "std"; distFileName = getOutputFileName("phylip",variables); outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outSTD; util.openOutputFile(distFileName, outSTD); outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); printSims(outSTD, stdmatrix, GroupNames); outSTD.close(); } } return 0; } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "process"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/commands/summarysharedcommand.h000077500000000000000000000076461424121717000226360ustar00rootroot00000000000000#ifndef SUMMARYSHAREDCOMMAND_H #define SUMMARYSHAREDCOMMAND_H /* * summarysharedcommand.h * Dotur * * Created by Sarah Westcott on 1/2/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "calculator.h" #include "validcalculator.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharednseqs.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedkstest.h" #include "whittaker.h" #include "sharedochiai.h" #include "sharedanderbergs.h" #include "sharedkulczynski.h" #include "sharedkulczynskicody.h" #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" //#include "sharedjackknife.h" #include "whittaker.h" #include "odum.h" #include "canberra.h" #include "structeuclidean.h" #include "structchord.h" #include "hellinger.h" #include "manhattan.h" #include "structpearson.h" #include "soergel.h" #include "spearman.h" #include "structkulczynski.h" #include "structchi2.h" #include "speciesprofile.h" #include "hamming.h" #include "gower.h" #include "memchi2.h" #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" #include "sharedjsd.h" #include "sharedrjsd.h" class SummarySharedCommand : public Command { public: SummarySharedCommand(string); ~SummarySharedCommand() = default; vector setParameters(); string getCommandName() { return "summary.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Summary.shared"; } string getDescription() { return "generate a summary file containing calculator values for each line in the OTU data and for all possible comparisons between groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector lines; bool abort, allLines, mult, all, createPhylip, subsample, withReplacement; set labels; //holds labels to be used string label, calc, groups, sharedfile, output; vector Estimators, Groups, outputNames, sumCalculatorsNames; string format; int numGroups, processors, subsampleSize, iters, numCalcs; int process(SharedRAbundVectors*, string, string, vector); int printSims(ostream&, vector< vector >&, vector); int runCalcs(SharedRAbundVectors*&, string, string, vector< vector >&); }; /**************************************************************************************************/ //custom data structure for threads to use. //main process handling the calcs that can do more than 2 groups // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). struct summarySharedData { vector thisLookup; vector< vector > calcDists; vector Estimators; unsigned long long start; unsigned long long end; MothurOut* m; string sumFile, sumAllFile; int count; bool main, mult; bool subsample; Utils util; summarySharedData(){} summarySharedData(string sf, string sfa, MothurOut* mout, unsigned long long st, unsigned long long en, vector est, SharedRAbundVectors*& lu, bool mai, bool mu, bool sub) { sumFile = sf; sumAllFile = sfa; m = mout; start = st; end = en; Estimators = est; thisLookup = lu->getSharedRAbundVectors(); count=0; main = mai; mult = mu; subsample = sub; } ~summarySharedData() { for (int j = 0; j < thisLookup.size(); j++) { delete thisLookup[j]; } thisLookup.clear(); } }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/summarytaxcommand.cpp000077500000000000000000000350231424121717000225050ustar00rootroot00000000000000/* * summarytaxcommand.cpp * Mothur * * Created by westcott on 9/23/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "summarytaxcommand.h" #include "phylosummary.h" //********************************************************************************************************************** vector SummaryTaxCommand::setParameters(){ try { CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(ptaxonomy); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); CommandParameter poutput("output", "Multiple", "simple-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter pthreshold("threshold", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pthreshold); CommandParameter pprintlevel("printlevel", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pprintlevel); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["summary"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string SummaryTaxCommand::getHelpString(){ try { string helpString = ""; helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n"; helpString += "The summary.tax command parameters are taxonomy, count, group, name and relabund. taxonomy is required, unless you have a valid current taxonomy file.\n"; helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n"; helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n"; helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n"; helpString += "The threshold parameter allows you to specify a cutoff for the taxonomy file that is being inputted. Once the classification falls below the threshold the mothur will refer to it as unclassified when calculating the concensus. This feature is similar to adjusting the cutoff in classify.seqs. Default=0.\n"; helpString += "The output parameter allows you to specify format of your summary file. Options are simple and detail. The default is detail.\n"; helpString += "The printlevel parameter allows you to specify taxlevel of your summary file to print to. Options are 1 to the maz level in the file. The default is -1, meaning max level. If you select a level greater than the level your sequences classify to, mothur will print to the level your max level. \n"; helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The summary.tax command should be in the following format: \n"; helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string SummaryTaxCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "summary") { pattern = "[filename],tax.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** SummaryTaxCommand::SummaryTaxCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; taxfile = validParameter.validFile(parameters, "taxonomy"); if (taxfile == "not open") { abort = true; } else if (taxfile == "not found") { taxfile = current->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter.\n"); } else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required.\n"); abort = true; } }else { current->setTaxonomyFile(taxfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if (outputdir == ""){ outputdir += util.hasPath(taxfile); } string temp = validParameter.valid(parameters, "relabund"); if (temp == "not found"){ temp = "false"; } relabund = util.isTrue(temp); temp = validParameter.valid(parameters, "printlevel"); if (temp == "not found"){ temp = "-1"; } util.mothurConvert(temp, printlevel); output = validParameter.valid(parameters, "output"); if(output == "not found"){ output = "detail"; } if ((output != "simple") && (output != "detail")) { m->mothurOut(output + " is not a valid output form. Options are simple and detail. I will use detail.\n"); output = "detail"; } temp = validParameter.valid(parameters, "threshold"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, threshold); } } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "SummaryTaxCommand"); exit(1); } } //*************************************************************************************************************** int SummaryTaxCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } int maxLevel = findMaxLevel(taxfile); long start = time(nullptr); GroupMap* groupMap = nullptr; CountTable* ct = nullptr; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); }else if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, true, false); } PhyloSummary* taxaSum; if (countfile != "") { taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } if (m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; } if (ct != nullptr) { delete ct; } delete taxaSum; return 0; } int numSeqs = 0; map > nameMap; map >::iterator itNames; if (namefile != "") { util.readNames(namefile, nameMap); } ifstream in; util.openInputFile(taxfile, in); string name, taxon; while(!in.eof()){ if (m->getControl_pressed()) { break; } in >> name; gobble(in); taxon = util.getline(in); gobble(in); string newTax = util.addUnclassifieds(taxon, maxLevel, true); if (threshold != 0) { newTax = processTaxMap(newTax); } //add sequence to summary, countfile info included from Phylosummary constructor if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct.\n"); exit(1); }else{ for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); //add it as many times as there are identical seqs numSeqs++; } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); numSeqs++; } } in.close(); if (m->getControl_pressed()) { if (groupMap != nullptr) { delete groupMap; } if (ct != nullptr) { delete ct; } delete taxaSum; return 0; } //print summary file ofstream outTaxTree; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(taxfile)); string summaryFile = getOutputFileName("summary",variables); util.openOutputFile(summaryFile, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); delete taxaSum; if (groupMap != nullptr) { delete groupMap; } if (ct != nullptr) { numSeqs = ct->getNumSeqs(); delete ct; } if (m->getControl_pressed()) { util.mothurRemove(summaryFile); return 0; } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences.\n"); m->mothurOutEndLine(); m->mothurOut("\nOutput File Names: \n"); m->mothurOut(summaryFile); m->mothurOutEndLine(); outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "execute"); exit(1); } } /**************************************************************************************************/ string SummaryTaxCommand::processTaxMap(string tax) { try{ string newTax = tax; vector taxons; int taxLength = tax.length(); string taxon = ""; int spot = 0; for(int i=0;ierrorOut(e, "SummaryTaxCommand", "processTaxMap"); exit(1); } } /**************************************************************************************************/ int SummaryTaxCommand::findMaxLevel(string file) { try{ GroupMap* groupMap = nullptr; PhyloSummary taxaSum(groupMap, false, -1); taxaSum.summarize(file); return taxaSum.getMaxLevel(); } catch(exception& e) { m->errorOut(e, "SummaryTaxCommand", "findMaxLevel"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/summarytaxcommand.h000077500000000000000000000024131424121717000221470ustar00rootroot00000000000000#ifndef SUMMARYTAXCOMMAND_H #define SUMMARYTAXCOMMAND_H /* * summarytaxcommand.h * Mothur * * Created by westcott on 9/23/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "command.hpp" #include "counttable.h" /**************************************************************************************************/ class SummaryTaxCommand : public Command { public: SummaryTaxCommand(string); ~SummaryTaxCommand(){} vector setParameters(); string getCommandName() { return "summary.tax"; } string getCommandCategory() { return "Phylotype Analysis"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Summary.tax"; } string getDescription() { return "summarize the taxonomies of a set of sequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, relabund; string taxfile, namefile, groupfile, countfile, output; int printlevel, threshold; vector outputNames; map nameMap; string processTaxMap(string); int findMaxLevel(string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/systemcommand.cpp000077500000000000000000000071531424121717000216220ustar00rootroot00000000000000/* * systemcommand.cpp * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "systemcommand.h" //********************************************************************************************************************** vector SystemCommand::setParameters(){ try { CurrentFile* current; current = CurrentFile::getInstance(); outputdir = current->getOutputDir(); abort = false; calledHelp = false; vector myArray; return myArray; } catch(exception& e) { m->errorOut(e, "SystemCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** SystemCommand::SystemCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { setParameters(); string optionCopy = option; string parameter = ""; //if command is used parameter=command optionCopy=cp .... if (optionCopy.find("command=") != string::npos) { util.splitAtEquals(parameter, optionCopy); } //ValidParameters validParameter; if (parameter != "command") { command = option; } else { command = optionCopy; } //command= removed if ((command == "")) { m->mothurOut("[ERROR]: You must enter a command to run.\n"); abort = true; } } } catch(exception& e) { m->errorOut(e, "SystemCommand", "SystemCommand"); exit(1); } } //********************************************************************************************************************** string SystemCommand::getHelpString(){ try { string helpString = ""; helpString += "The system command allows you to execute a system command from within mothur.\n"; helpString += "The system has no parameters.\n"; helpString += "The system command should be in the following format: system(yourCommand).\n"; helpString += "Example system(clear).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "SystemCommand", "help"); exit(1); } } //********************************************************************************************************************** int SystemCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (outputdir == "") { outputdir = "./"; } string redirectFileName = outputdir + "commandScreen.output"; //if command contains a redirect don't add the redirect bool usedRedirect = false; if ((command.find('>')) == string::npos) { command += " > " + redirectFileName + " 2>&1"; usedRedirect = true; } //m->mothurOut("[DEBUG]: command = '" + command + "'\n"); if (m->getDebug()) { m->mothurOut("[DEBUG]: command = '" + command + "'\n"); } system(command.c_str()); if (usedRedirect) { ifstream in; util.openInputFile(redirectFileName, in, "no error"); string output = ""; while(char c = in.get()){ if(in.eof()) { break; } else { output += c; } } in.close(); m->mothurOut(output); m->mothurOutEndLine(); util.mothurRemove(redirectFileName); } return 0; } catch(exception& e) { m->errorOut(e, "SystemCommand", "execute"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/systemcommand.h000077500000000000000000000015461424121717000212670ustar00rootroot00000000000000#ifndef SYSTEMCOMMAND_H #define SYSTEMCOMMAND_H /* * systemcommand.h * Mothur * * Created by Sarah Westcott on 7/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" class SystemCommand : public Command { public: SystemCommand(string); ~SystemCommand(){} vector setParameters(); string getCommandName() { return "system"; } string getCommandCategory() { return "General"; } string getHelpString(); string getOutputPattern(string){ return ""; } string getCitation() { return "http://www.mothur.org/wiki/System"; } string getDescription() { return "execute system commands from within mothur"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string command; bool abort; vector outputNames; }; #endif mothur-1.48.0/source/commands/translateseqscommand.cpp000066400000000000000000000603171424121717000231650ustar00rootroot00000000000000// // translateseqscommand.cpp // Mothur // // Created by Sarah Westcott on 11/8/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "translateseqscommand.hpp" //********************************************************************************************************************** vector TranslateSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "FastaReport", "none", "none","summary",false,true,true); parameters.push_back(pfasta); CommandParameter pamino("amino", "InputTypes", "", "", "FastaReport", "none", "none","summary",false,true,true); parameters.push_back(pamino); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pframes("frames", "Multiple", "1-2-3--1--2--3", "1", "", "", "","",true,false,true); parameters.push_back(pframes); CommandParameter pstop("stop", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pstop); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string TranslateSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The translate.seqs command reads a fastafile containing dna and translates it to amino acids. Alternatively, you can provide an amino file with aligned amino acids and fasta file with unaligned dna sequences and mothur will align the dna to the amino acid. Mothur expects the aligned amino acids and the fasta file to contain the same sequence names.\n"; helpString += "The translate.seqs command parameters are fasta, amino, frames, stop and processors.\n"; helpString += "The fasta parameter is used to provide a file containing DNA sequences. It is required.\n"; helpString += "The amino parameter is used to provide a file related to the fasta file containing amino acid sequences. The amino file should be aligned and mothur will align the DNA reads to the amino acids. Mothur assumes both files are in the same frame.\n"; helpString += "The frames parameter is used to indicate the reading frames you want to use. Options are 1, 2, 3, -1, -2, -3. You can select multiple frames by separating them with '|' characters. For example: frames=1|-1|2.\n"; helpString += "The stop parameter is used to indicate when to stop the translation. If T, then if the translation hits a stop codon, it stops before that codon. If F, it returns the full translation with a * as the stop codon. Default=t.\n"; helpString += "The translate.seqs command should be in the following format: \n"; helpString += "translate.seqs(fasta=yourFastaFile, processors=2) or translate.seqs(amino=yourProteinSeqs, fasta=yourDNASeqs)\n"; getCommonQuestions(); return helpString; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string TranslateSeqsCommand::getCommonQuestions(){ try { vector questions, issues, qanswers, ianswers, howtos, hanswers; string commonQuestions = util.getFormattedHelp(questions, qanswers, issues, ianswers, howtos, hanswers); return commonQuestions; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "getCommonQuestions"); exit(1); } } //********************************************************************************************************************** string TranslateSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],align-[filename],[tag],fasta"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** TranslateSeqsCommand::TranslateSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; abort = true; } else { current->setFastaFile(fastafile); } aminofile = validParameter.validFile(parameters, "amino"); if (aminofile == "not open") { aminofile = ""; abort = true; } else if (aminofile == "not found") { aminofile = ""; } string temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "frames"); if (temp == "not found"){ temp = "1"; } vector fs; util.splitAtChar(temp, fs, '|'); for (int i = 0; i < fs.size(); i++) { int thisFrame; util.mothurConvert(fs[i], thisFrame); if ((thisFrame == 1) || (thisFrame == 2) || (thisFrame == 3) || (thisFrame == -2) || (thisFrame == -1) || (thisFrame == -3)) { frames.push_back(thisFrame); } else { m->mothurOut("[WARNING]: " + fs[i] + " is not a valid frame option. Options include 1,2,3,-1,-2,-3. Ignoring " + fs[i] + "\n"); } } if (frames.size() == 0) { abort = true; } temp = validParameter.valid(parameters, "stop"); if (temp == "not found") { temp = "T"; } stop = util.isTrue(temp); } } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "TranslateSeqsCommand"); exit(1); } } //*************************************************************************************************************** int TranslateSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } if (aminofile != "") { alignDNAAmino(); } //if amino file is provided then we are aligning dna to aligned amino acids, assumes same frame else { translateDNAtoAmino(); } //set accnos file as new current accnosfile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names:\n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "execute"); exit(1); } } //********************************************************************************************************************** void align(Alignment* alignment, Sequence& seq, Protein& prot, MothurOut* m) { try { int alignmentSize = max(prot.getAligned().size(), (seq.getUnaligned().size() / 3)+1); if (alignmentSize+1 > alignment->getnRows()) { if (m->getDebug()) { m->mothurOut("[DEBUG]: " + seq.getName() + " " + toString(seq.getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); } alignment->resize(alignmentSize+2); } alignment->align(seq, prot); seq.setAligned(alignment->getSeqAAln()); delete alignment; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "alignDNA"); exit(1); } } //********************************************************************************************************************** //aligns dna to aligned amino acids void alignAminoDriver(alignAminoStruct* params) { try { ifstream inFASTA; params->util.openInputFile(params->fastaFilename, inFASTA); inFASTA.seekg(params->fastaPos.start); ifstream inAMINO; params->util.openInputFile(params->aminoFilename, inAMINO); inAMINO.seekg(params->aminoPos.start); bool done = false; long long count = 0; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(inFASTA); gobble(inFASTA); Protein prot(inAMINO); gobble(inAMINO); if ((seq.getName() != "") && (prot.getName() != "") && (seq.getName() == prot.getName())) { align(params->alignment, seq, prot, params->m); count++; params->outputWriter->write('>' + seq.getName() + '\n' + seq.getAligned() + '\n'); } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->fastaPos.end)) { break; } unsigned long long pos2 = inAMINO.tellg(); if ((pos2 == -1) || (pos2 >= params->aminoPos.end)) { break; } #else if (count == params->fastaPos.end) { break; } if (count == params->aminoPos.end) { break; } #endif //report progress if((count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress if((count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } params->numSeqs = count; inFASTA.close(); inAMINO.close(); } catch(exception& e) { params->m->errorOut(e, "TranslateSeqsCommand", "alignAminoDriver"); exit(1); } } //********************************************************************************************************************** void TranslateSeqsCommand::alignDNAAmino() { try { long start = time(nullptr); //fills lines and alines. Also sets dnaAligned and aminoAligned setLines(); //create output file names string thisOutputDir = outputdir; string outputFileName = ""; map variables; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); outputFileName = getOutputFileName("fasta", variables); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); //create array of worker threads vector workerThreads; vector data; auto synchronizedOutputFile = std::make_shared(outputFileName); for (int i = 0; i < processors-1; i++) { OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); alignAminoStruct* dataBundle = new alignAminoStruct(lines[i+1], aLines[i+1], threadOutputWriter, fastafile, aminofile, stop); data.push_back(dataBundle); workerThreads.push_back(new std::thread(alignAminoDriver, dataBundle)); } OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); alignAminoStruct* dataBundle = new alignAminoStruct(lines[0], aLines[0], threadOutputWriter, fastafile, aminofile, stop); alignAminoDriver(dataBundle); double num = dataBundle->numSeqs; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->numSeqs; delete data[i]->outputWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputFile->close(); delete threadOutputWriter; delete dataBundle; m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to align " + toString(num) + " sequences.\n"); } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "alignDNAAmino"); exit(1); } } //********************************************************************************************************************** void TranslateSeqsCommand::translateDNAtoAmino() { try { long start = time(nullptr); string thisOutputDir = outputdir; if (outputdir == "") { thisOutputDir += util.hasPath(fastafile); } map variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)); vector positions; #if defined NON_WINDOWS positions = util.divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(fastafile, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; m->mothurOut("Reducing processors to " + toString(numFastaSeqs) + ".\n"); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif double numSeqs = 0; for (int i = 0; i < frames.size(); i++) { if (m->getControl_pressed()) { break; } m->mothurOut("\nTranslating sequences to amino acids using frame " + toString(frames[i]) + ":\n"); variables["[tag]"] = "aa"+toString(frames[i]); string outputFileName = getOutputFileName("fasta", variables); outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); numSeqs = createProcessesTranslateDNAtoAminoAcids(outputFileName, lines, frames[i]); } m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to translate " + toString(numSeqs) + " sequences.\n"); } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "translateDNAtoAmino"); exit(1); } } //********************************************************************************************************************** void translateToAminoAcidDriver(translateSeqsStruct* params) { try { ifstream inFASTA; params->util.openInputFile(params->inputFilename, inFASTA); inFASTA.seekg(params->filePos.start); bool done = false; long long count = 0; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(inFASTA); gobble(inFASTA); if (seq.getName() != "") { Protein prot = seq.getProtein(params->frame, params->stop); prot.printProtein(params->outputWriter); count++; } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->filePos.end)) { break; } #else if (count == params->filePos.end) { break; } #endif //report progress if((count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress if((count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } params->numSeqs = count; inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "TranslateSeqsCommand", "translateToAminoAcidDriver"); exit(1); } } //*************************************************************************************************************** // translateSeqsStruct (linePair fP, OutputWriter* oFName, string fname, bool st, bool dn, int frame) { double TranslateSeqsCommand::createProcessesTranslateDNAtoAminoAcids(string outputFileName, vector lines, int frame) { try { //create array of worker threads vector workerThreads; vector data; auto synchronizedOutputFile = std::make_shared(outputFileName); for (int i = 0; i < processors-1; i++) { OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); translateSeqsStruct* dataBundle = new translateSeqsStruct(lines[i+1], threadOutputWriter, fastafile, stop, frame); data.push_back(dataBundle); workerThreads.push_back(new std::thread(translateToAminoAcidDriver, dataBundle)); } OutputWriter* threadOutputWriter = new OutputWriter(synchronizedOutputFile); translateSeqsStruct* dataBundle = new translateSeqsStruct(lines[0], threadOutputWriter, fastafile, stop, frame); translateToAminoAcidDriver(dataBundle); double num = dataBundle->numSeqs; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->numSeqs; delete data[i]->outputWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputFile->close(); delete threadOutputWriter; delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "TranslateSeqsCommand", "createProcesses"); exit(1); } } //*************************************************************************************************************** bool TranslateSeqsCommand::setLines() { try { vector fastaFilePos; vector afileFilePos; #if defined NON_WINDOWS //set file positions for fasta file fastaFilePos = util.divideFile(fastafile, processors); //get name of first sequence in each chunk map firstSeqNames; for (int i = 0; i < (fastaFilePos.size()-1); i++) { ifstream in; util.openInputFile(fastafile, in); in.seekg(fastaFilePos[i]); //adjust start if null strings if (i == 0) { util.zapGremlins(in); gobble(in); } Sequence temp(in); firstSeqNames[temp.getName()] = i; in.close(); } if(aminofile != "") { //seach for filePos of each first names in the aminofile and save in afileFilePos ifstream inAmino; util.openInputFile(aminofile, inAmino); string input; while(!inAmino.eof()){ input = util.getline(inAmino); if (input.length() != 0) { if(input[0] == '>'){ //this is a sequence name line istringstream nameStream(input); string sname = ""; nameStream >> sname; sname = sname.substr(1); util.checkName(sname); map::iterator it = firstSeqNames.find(sname); if(it != firstSeqNames.end()) { //this is the start of a new chunk double pos = inAmino.tellg(); afileFilePos.push_back(pos - input.length() - 1); firstSeqNames.erase(it); } } } if (firstSeqNames.size() == 0) { break; } } inAmino.close(); if (firstSeqNames.size() != 0) { for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { m->mothurOut(it->first + " is in your fasta file and not in your amino file, aborting.\n"); m->setControl_pressed(true); } return false; } //get last file position of qfile FILE * pFile; double size; //get num bytes in file aminofile = util.getFullPathName(aminofile); pFile = fopen (aminofile.c_str(),"rb"); if (pFile==nullptr) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); } afileFilePos.push_back(size); } for (int i = 0; i < (fastaFilePos.size()-1); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); } lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)])); if (aminofile != "") { aLines.push_back(linePair(afileFilePos[i], afileFilePos[(i+1)])); } } #else long long numFastaSeqs = 0; fastaFilePos = util.setFilePosFasta(fastafile, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } if (aminofile != "") { long long numAminoSeqs = 0; afileFilePos = util.setFilePosFasta(aminofile, numAminoSeqs); if (numFastaSeqs != numAminoSeqs) { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numAminoSeqs) + " sequences in your amino file, please correct.\n"); m->setControl_pressed(true); return false; } } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor)); if (aminofile != "") { aLines.push_back(linePair(afileFilePos[startIndex], numSeqsPerProcessor)); } } #endif return true; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "setLines"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/translateseqscommand.hpp000066400000000000000000000105031424121717000231620ustar00rootroot00000000000000// // translateseqscommand.hpp // Mothur // // Created by Sarah Westcott on 11/8/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef translateseqscommand_hpp #define translateseqscommand_hpp #include "command.hpp" #include "sequence.hpp" #include "protein.hpp" #include "needlemanoverlap.hpp" /* This command would take... * DNA sequences and translate it to an amino acid sequence By default it would use the first frame The user could also specify the frame (1, 2, 3, -1, -2, -3) or possibly use all 6 frames Another option would be stop=T/F. If T, then if the translation hits a stop codon, it stops before that codon. If F, it returns the full translation with a * as the stop codon Output as *.aa#.fasta where # is the frame * Amino acid sequences and translate it to a DNA sequence Because of degeneracies there will be non-ATGC IUPAC codes in the output sequence Output as *.dna.fasta * Unaligned DNA and unaligned/aligned Amino acid sequences Back translate the amino acid sequence to the DNA sequence so that the DNA is aligned. This should result in the DNA bases being clustered in groups of 3 corresponding to each amino acid codon Hopefully the DNA sequence and the amino acid sequence will be in the same frame Output alignment as *.dna.align */ /**************************************************************************************************/ class TranslateSeqsCommand : public Command { public: TranslateSeqsCommand(string); ~TranslateSeqsCommand(){} vector setParameters(); string getCommandName() { return "tranlate.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getCommonQuestions(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/translate.seqs"; } string getDescription() { return "tranlate dna to amino acids or align dna to amino acids"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, stop; string fastafile, aminofile; int processors; vector outputNames; vector frames; vector lines; vector aLines; bool setLines(); //returns true if error free void translateDNAtoAmino(); void alignDNAAmino(); double createProcessesTranslateDNAtoAminoAcids(string, vector, int); double createProcessesAlign(string); }; //********************************************************************************************************************** struct translateSeqsStruct { OutputWriter* outputWriter; string inputFilename; bool stop; int frame; double numSeqs; linePair filePos; MothurOut* m; Utils util; translateSeqsStruct (linePair fP, OutputWriter* oFName, string fname, bool st, int f) { //passed in filePos.start = fP.start; filePos.end = fP.end; outputWriter = oFName; inputFilename = fname; frame = f; stop = st; //initialized numSeqs = 0; m = MothurOut::getInstance(); } ~translateSeqsStruct() = default; }; //********************************************************************************************************************** struct alignAminoStruct { OutputWriter* outputWriter; string fastaFilename, aminoFilename; bool stop; double numSeqs; linePair fastaPos; linePair aminoPos; MothurOut* m; Utils util; Alignment* alignment; alignAminoStruct (linePair fP, linePair aP, OutputWriter* oFName, string fname, string aname, bool st) { //passed in fastaPos.start = fP.start; fastaPos.end = fP.end; aminoPos.start = aP.start; aminoPos.end = aP.end; outputWriter = oFName; fastaFilename = fname; aminoFilename = aname; stop = st; alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, 5000); //initialized numSeqs = 0; m = MothurOut::getInstance(); } ~alignAminoStruct() = default; }; //********************************************************************************************************************** #endif /* translateseqscommand_hpp */ mothur-1.48.0/source/commands/treesharedcommand.cpp000077500000000000000000001230371424121717000224240ustar00rootroot00000000000000/* * treegroupscommand.cpp * Mothur * * Created by Sarah Westcott on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treesharedcommand.h" #include "subsample.h" #include "consensus.h" //********************************************************************************************************************** vector TreeSharedCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","tree",false,false,true); parameters.push_back(pshared); CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","tree",false,false); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "countcolumn","",false,false); parameters.push_back(pcount); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName-countcolumn","tree",false,false); parameters.push_back(pcolumn); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; allLines = true; vector tempOutNames; outputTypes["tree"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string TreeSharedCommand::getHelpString(){ try { string helpString = ""; ValidCalculators validCalculator; helpString += "The tree.shared command creates a .tre to represent the similarity between groups or sequences.\n"; helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision, processors, subsample, iters and label.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n"; helpString += "The group names are separated by dashes. The label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n"; helpString += "The phylip or column parameter are required if you do not provide a sharedfile, and only one may be used. If you use a column file the name filename is required. \n"; helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n"; helpString += "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n"; helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a shared file.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "Example tree.shared(groups=A-B-C, calc=jabund-sorabund).\n"; helpString += "The default value for groups is all the groups in your groupfile.\n"; helpString += "The default value for calc is jclass-thetayc.\n"; helpString += "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n"; helpString += validCalculator.printCalc("treegroup"); helpString += "Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n"; helpString += "Example tree.shared(phylip=abrecovery.dist).\n"; return helpString; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string TreeSharedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "tree") { pattern = "[filename],[calc],[distance],[tag],tre-[filename],tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** TreeSharedCommand::TreeSharedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser. getParameters(); ValidParameters validParameter; phylipfile = validParameter.validFile(parameters, "phylip"); if (phylipfile == "not open") { phylipfile = ""; abort = true; } else if (phylipfile == "not found") { phylipfile = ""; } else { inputfile = phylipfile; format = "phylip"; current->setPhylipFile(phylipfile); } columnfile = validParameter.validFile(parameters, "column"); if (columnfile == "not open") { columnfile = ""; abort = true; } else if (columnfile == "not found") { columnfile = ""; } else { inputfile = columnfile; format = "column"; current->setColumnFile(columnfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { inputfile = sharedfile; format = "sharedfile"; current->setSharedFile(sharedfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((phylipfile == "") && (columnfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then column, then phylip sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { columnfile = current->getColumnFile(); if (columnfile != "") { inputfile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter.\n"); } else { phylipfile = current->getPhylipFile(); if (phylipfile != "") { inputfile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a shared, phylip or column file.\n"); abort = true; } } } } else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters.\n"); abort = true; } if (columnfile != "") { if ((namefile == "") && (countfile == "")){ namefile = current->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format.\n"); abort = true; } } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { calc = "jclass-thetayc"; } else { if (calc == "default") { calc = "jclass-thetayc"; } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } string temp; temp = validParameter.valid(parameters, "precision"); if (temp == "not found") { temp = "100"; } util.mothurConvert(temp, precision); temp = validParameter.valid(parameters, "cutoff"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "iters"); if (temp == "not found") { temp = "1000"; } util.mothurConvert(temp, iters); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (!subsample) { iters = 1; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); if (subsample && (format != "sharedfile")) { m->mothurOut("[ERROR]: the subsample parameter can only be used with a shared file.\n"); abort=true; } if (outputdir == ""){ outputdir += util.hasPath(inputfile); } } } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "TreeSharedCommand"); exit(1); } } //********************************************************************************************************************** TreeSharedCommand::~TreeSharedCommand(){} //********************************************************************************************************************** int TreeSharedCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (format == "sharedfile") { InputData input(sharedfile, "sharedfile", Groups); set processedLabels; set userLabels = labels; string lastLabel = ""; SharedRAbundVectors* lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); Groups = lookup->getNamesGroups(); if (subsample) { if (subsampleSize == -1) { //user has not set size, set size = smallest samples size subsampleSize = lookup->getNumSeqsSmallestGroup(); }else { lookup->removeGroups(subsampleSize); Groups = lookup->getNamesGroups(); Treenames = Groups; } if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return 0; } } numGroups = lookup->size(); if (numGroups < 2) { m->mothurOut("[ERROR]: You have not provided enough valid groups. I cannot run the command.\n"); return 0; } //create treemap class from groupmap for tree class to use CountTable ct; set nameMap; map groupMap; set gps; for (int i = 0; i < Groups.size(); i++) { nameMap.insert(Groups[i]); gps.insert(Groups[i]); groupMap[Groups[i]] = Groups[i]; } ct.createTable(nameMap, groupMap, gps); //fills tree names with shared files groups Treenames = lookup->getNamesGroups(); if (m->getControl_pressed()) { return 0; } while (lookup != nullptr) { if (m->getControl_pressed()) { delete lookup; break; } createProcesses(lookup, ct); delete lookup; lookup = util.getNextShared(input, allLines, userLabels, processedLabels, lastLabel); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } }else{ //read in dist file filename = inputfile; ReadMatrix* readMatrix; if (format == "column") { readMatrix = new ReadColumnMatrix(filename); } else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); } readMatrix->setCutoff(cutoff); ListVector* list; if(namefile != ""){ NameAssignment* nameMap = new NameAssignment(namefile); nameMap->readMap(); readMatrix->read(nameMap); list = readMatrix->getListVector(); delete nameMap; }else if (countfile != "") { CountTable* ct = new CountTable(); ct->readTable(countfile, true, false); readMatrix->read(ct); list = readMatrix->getListVector(); delete ct; }else { NameAssignment* nameMap = nullptr; readMatrix->read(nameMap); list = readMatrix->getListVector(); } SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix(); Treenames.clear(); //make treemap CountTable ct; set nameMap; map groupMap; set gps; for (int i = 0; i < list->getNumBins(); i++) { string bin = list->get(i); nameMap.insert(bin); gps.insert(bin); groupMap[bin] = bin; Treenames.push_back(bin); } ct.createTable(nameMap, groupMap, gps); vector namesGroups = ct.getNamesOfGroups(); if (m->getControl_pressed()) { return 0; } vector< vector > matrix = makeSimsDist(dMatrix, list->getNumBins()); delete readMatrix; delete dMatrix; if (m->getControl_pressed()) { return 0; } //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //printSims(cout, matrix, Treenames); Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = nullptr; } else { newTree->assembleTree(); } if (newTree != nullptr) { newTree->createNewickFile(outputFile); delete newTree; } if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n"); } //set tree file as new current treefile string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "execute"); exit(1); } } /***********************************************************/ void TreeSharedCommand::printSims(ostream& out, vector< vector >& simMatrix, vector groupNames) { try { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out << simMatrix.size() << endl; for (int b = 0; b < simMatrix.size(); b++) { out << groupNames[b]; for (int n = 0; n < b; n++) { out << '\t' << simMatrix[b][n]; } out << endl; } } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "printSims"); exit(1); } } /***********************************************************/ vector< vector > TreeSharedCommand::makeSimsDist(SparseDistanceMatrix* matrix, int numGroups) { try { //initialize simMatrix vector< vector > simMatrix; simMatrix.resize(numGroups); for (int k = 0; k < simMatrix.size(); k++) { for (int j = 0; j < simMatrix.size(); j++) { simMatrix[k].push_back(0.0); } } //go through sparse matrix and fill sims //go through each cell in the sparsematrix for (int i = 0; i < matrix->seqVec.size(); i++) { for (int j = 0; j < matrix->seqVec[i].size(); j++) { //already checked everyone else in row if (i < matrix->seqVec[i][j].index) { simMatrix[i][matrix->seqVec[i][j].index] = -(matrix->seqVec[i][j].dist -1.0); simMatrix[matrix->seqVec[i][j].index][i] = -(matrix->seqVec[i][j].dist -1.0); if (m->getControl_pressed()) { return simMatrix; } } } } return simMatrix; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "makeSimsDist"); exit(1); } } /**************************************************************************************************/ int driverTreeShared(vector& thisLookup, vector< vector >& calcDists, vector treeCalculators, MothurOut* m) { try { vector subset; for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare for (int l = 0; l < k; l++) { if (k != l) { //we dont need to similarity of a groups to itself subset.clear(); //clear out old pair of sharedrabunds //add new pair of sharedrabunds subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); for(int i=0;igetNeedsAll()) { //load subset with rest of lookup for those calcs that need everyone to calc for a pair for (int w = 0; w < thisLookup.size(); w++) { if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); } } } vector tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs if (m->getControl_pressed()) { return 1; } seqDist temp(l, k, tempdata[0]); calcDists[i].push_back(temp); } } } } return 0; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "driverTreeShared"); exit(1); } } /**************************************************************************************************/ struct treeSharedData { SharedRAbundVectors* thisLookup; vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector< vector > > matrices; //for each calculator a square matrix to represent the distances, only filled by main thread vector Estimators; long long numIters; MothurOut* m; int count, subsampleSize; bool subsample, withReplacement; treeSharedData(){} treeSharedData(long long st, bool su, bool wr, int subsize, vector est, SharedRAbundVectors* lu) { m = MothurOut::getInstance(); numIters = st; Estimators = est; thisLookup = lu; count = 0; subsample = su; withReplacement = wr; subsampleSize = subsize; } }; /***********************************************************/ int process(treeSharedData* params) { try{ ValidCalculators validCalculator; vector treeCalculators; for (int i=0; iEstimators.size(); i++) { if (validCalculator.isValidCalculator("treegroup", params->Estimators[i]) ) { if (params->Estimators[i] == "sharedsobs") { treeCalculators.push_back(new SharedSobsCS()); }else if (params->Estimators[i] == "sharedchao") { treeCalculators.push_back(new SharedChao1()); }else if (params->Estimators[i] == "sharedace") { treeCalculators.push_back(new SharedAce()); }else if (params->Estimators[i] == "jabund") { treeCalculators.push_back(new JAbund()); }else if (params->Estimators[i] == "sorabund") { treeCalculators.push_back(new SorAbund()); }else if (params->Estimators[i] == "jclass") { treeCalculators.push_back(new Jclass()); }else if (params->Estimators[i] == "sorclass") { treeCalculators.push_back(new SorClass()); }else if (params->Estimators[i] == "jest") { treeCalculators.push_back(new Jest()); }else if (params->Estimators[i] == "sorest") { treeCalculators.push_back(new SorEst()); }else if (params->Estimators[i] == "thetayc") { treeCalculators.push_back(new ThetaYC()); }else if (params->Estimators[i] == "thetan") { treeCalculators.push_back(new ThetaN()); }else if (params->Estimators[i] == "kstest") { treeCalculators.push_back(new KSTest()); }else if (params->Estimators[i] == "sharednseqs") { treeCalculators.push_back(new SharedNSeqs()); }else if (params->Estimators[i] == "ochiai") { treeCalculators.push_back(new Ochiai()); }else if (params->Estimators[i] == "anderberg") { treeCalculators.push_back(new Anderberg()); }else if (params->Estimators[i] == "kulczynski") { treeCalculators.push_back(new Kulczynski()); }else if (params->Estimators[i] == "kulczynskicody") { treeCalculators.push_back(new KulczynskiCody()); }else if (params->Estimators[i] == "lennon") { treeCalculators.push_back(new Lennon()); }else if (params->Estimators[i] == "morisitahorn") { treeCalculators.push_back(new MorHorn()); }else if (params->Estimators[i] == "braycurtis") { treeCalculators.push_back(new BrayCurtis()); }else if (params->Estimators[i] == "whittaker") { treeCalculators.push_back(new Whittaker()); }else if (params->Estimators[i] == "odum") { treeCalculators.push_back(new Odum()); }else if (params->Estimators[i] == "canberra") { treeCalculators.push_back(new Canberra()); }else if (params->Estimators[i] == "structeuclidean") { treeCalculators.push_back(new StructEuclidean()); }else if (params->Estimators[i] == "structchord") { treeCalculators.push_back(new StructChord()); }else if (params->Estimators[i] == "hellinger") { treeCalculators.push_back(new Hellinger()); }else if (params->Estimators[i] == "manhattan") { treeCalculators.push_back(new Manhattan()); }else if (params->Estimators[i] == "structpearson") { treeCalculators.push_back(new StructPearson()); }else if (params->Estimators[i] == "soergel") { treeCalculators.push_back(new Soergel()); }else if (params->Estimators[i] == "spearman") { treeCalculators.push_back(new Spearman()); }else if (params->Estimators[i] == "structkulczynski") { treeCalculators.push_back(new StructKulczynski()); }else if (params->Estimators[i] == "speciesprofile") { treeCalculators.push_back(new SpeciesProfile()); }else if (params->Estimators[i] == "hamming") { treeCalculators.push_back(new Hamming()); }else if (params->Estimators[i] == "structchi2") { treeCalculators.push_back(new StructChi2()); }else if (params->Estimators[i] == "gower") { treeCalculators.push_back(new Gower()); }else if (params->Estimators[i] == "memchi2") { treeCalculators.push_back(new MemChi2()); }else if (params->Estimators[i] == "memchord") { treeCalculators.push_back(new MemChord()); }else if (params->Estimators[i] == "memeuclidean") { treeCalculators.push_back(new MemEuclidean()); }else if (params->Estimators[i] == "mempearson") { treeCalculators.push_back(new MemPearson()); }else if (params->Estimators[i] == "jsd") { treeCalculators.push_back(new JSD()); }else if (params->Estimators[i] == "rjsd") { treeCalculators.push_back(new RJSD()); } } } //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { params->m->mothurOut("You have given no valid calculators.\n"); return 0; } params->Estimators.clear(); for (int i=0; iEstimators.push_back(treeCalculators[i]->getName()); } vector< vector > calcDists; calcDists.resize(treeCalculators.size()); SubSample sample; for (int thisIter = 0; thisIter < params->numIters; thisIter++) { SharedRAbundVectors* thisItersLookup = new SharedRAbundVectors(*params->thisLookup); vector namesOfGroups = thisItersLookup->getNamesGroups(); if (params->subsample) { if (params->withReplacement) { sample.getSampleWithReplacement(thisItersLookup, params->subsampleSize); } else { sample.getSample(thisItersLookup, params->subsampleSize); } } vector thisItersRabunds = thisItersLookup->getSharedRAbundVectors(); vector thisItersGroupNames = params->thisLookup->getNamesGroups(); driverTreeShared(thisItersRabunds, calcDists, treeCalculators, params->m); for (int i = 0; i < thisItersRabunds.size(); i++) { delete thisItersRabunds[i]; } if (params->subsample){ if((thisIter+1) % 100 == 0){ params->m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); } params->calcDistsTotals.push_back(calcDists); for (int i = 0; i < calcDists.size(); i++) { for (int j = 0; j < calcDists[i].size(); j++) { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisItersGroupNames[calcDists[i][j].seq1] + " - " + thisItersGroupNames[calcDists[i][j].seq2] + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } } } }else { //print results for whole dataset for (int i = 0; i < calcDists.size(); i++) { if (params->m->getControl_pressed()) { break; } //initialize matrix vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisItersLookup->size()); for (int k = 0; k < thisItersLookup->size(); k++) { matrix[k].resize(thisItersLookup->size(), 0.0); } for (int j = 0; j < calcDists[i].size(); j++) { int row = calcDists[i][j].seq1; int column = calcDists[i][j].seq2; double dist = calcDists[i][j].dist; matrix[row][column] = -(dist-1.0); matrix[column][row] = -(dist-1.0); } params->matrices.push_back(matrix); } } for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } delete thisItersLookup; } if((params->numIters) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->numIters)+"\n"); } for (int i=0; im->errorOut(e, "TreeSharedCommand", "process"); exit(1); } } /***********************************************************/ int TreeSharedCommand::createProcesses(SharedRAbundVectors*& thisLookup, CountTable& ct){ try { vector groupNames = thisLookup->getNamesGroups(); Treenames = groupNames; //may have changed if subsample eliminated groups vector lines; if (processors > (iters+1)) { processors = iters+1; } //figure out how many sequences you have to process int numItersPerProcessor = (iters+1) / processors; for (int i = 0; i < processors; i++) { if(i == (processors - 1)){ numItersPerProcessor = (iters+1) - i * numItersPerProcessor; } lines.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup); treeSharedData* dataBundle = new treeSharedData(lines[i+1], subsample, withReplacement, subsampleSize, Estimators, newLookup); data.push_back(dataBundle); workerThreads.push_back(new std::thread(process, dataBundle)); } //make copy of lookup so we don't get access violations SharedRAbundVectors* newLookup = new SharedRAbundVectors(*thisLookup); treeSharedData* dataBundle = new treeSharedData(lines[0], subsample, withReplacement, subsampleSize, Estimators, newLookup); process(dataBundle); delete newLookup; Estimators.clear(); Estimators = dataBundle->Estimators; vector< vector< vector > > calcDistsTotals = dataBundle->calcDistsTotals; vector< vector< vector > > matrices = dataBundle->matrices; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); //get calcDistsTotal info - one entry per iter for (int j = 0; j < data[i]->calcDistsTotals.size(); j++) { calcDistsTotals.push_back(data[i]->calcDistsTotals[j]); } delete data[i]->thisLookup; delete data[i]; delete workerThreads[i]; } delete dataBundle; if (subsample) { //we need to find the average distance and standard deviation for each groups distance vector< vector > calcAverages = util.getAverages(calcDistsTotals); if (m->getDebug()) { m->mothurOut("[DEBUG]: found averages.\n"); } //create average tree for each calc for (int i = 0; i < Estimators.size(); i++) { vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { matrix[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcAverages[i].size(); j++) { int row = calcAverages[i][j].seq1; int column = calcAverages[i][j].seq2; float dist = calcAverages[i][j].dist; matrix[row][column] = -(dist-1.0); //-(matrix->seqVec[i][j].dist -1.0) matrix[column][row] = -(dist-1.0); } //printSims(cout, matrix, Treenames); //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[calc]"] = Estimators[i]; variables["[distance]"] = thisLookup->getLabel(); variables["[tag]"] = "ave"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = nullptr; } else { newTree->assembleTree(); } if (newTree != nullptr) { newTree->createNewickFile(outputFile); delete newTree; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: done averages trees.\n"); } //create all trees for each calc and find their consensus tree for (int i = 0; i < Estimators.size(); i++) { if (m->getControl_pressed()) { break; } //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[calc]"] = Estimators[i]; variables["[distance]"] = thisLookup->getLabel(); variables["[tag]"] = "all"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; util.openOutputFile(outputFile, outAll); vector trees; for (int myIter = 0; myIter < iters; myIter++) { if(m->getControl_pressed()) { break; } //initialize matrix vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup->size()); for (int k = 0; k < thisLookup->size(); k++) { matrix[k].resize(thisLookup->size(), 0.0); } for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) { int row = calcDistsTotals[myIter][i][j].seq1; int column = calcDistsTotals[myIter][i][j].seq2; double dist = calcDistsTotals[myIter][i][j].dist; matrix[row][column] = -(dist-1.0); matrix[column][row] = -(dist-1.0); } //creates tree from similarity matrix and write out file Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = nullptr; } else { newTree->assembleTree(); } if (newTree != nullptr) { newTree->print(outAll); trees.push_back(newTree); } } outAll.close(); if (m->getControl_pressed()) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: done all trees.\n"); } Consensus consensus; Tree* conTree = consensus.getTree(trees); if (m->getDebug()) { m->mothurOut("[DEBUG]: done cons tree.\n"); } //create a new filename variables["[tag]"] = "cons"; string conFile = getOutputFileName("tree",variables); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; util.openOutputFile(conFile, outTree); if (conTree != nullptr) { conTree->print(outTree, "boot"); delete conTree; } } }else { for (int i = 0; i < matrices.size(); i++) { if (m->getControl_pressed()) { break; } //initialize matrix vector< vector > matrix = matrices[i]; //square matrix to represent the distance //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(inputfile)); variables["[calc]"] = Estimators[i]; variables["[distance]"] = thisLookup->getLabel(); variables["[tag]"] = ""; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); //creates tree from similarity matrix and write out file Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = nullptr; } else { newTree->assembleTree(); } if (newTree != nullptr) { newTree->createNewickFile(outputFile); delete newTree; } } } return 0; } catch(exception& e) { m->errorOut(e, "TreeSharedCommand", "createProcesses"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/treesharedcommand.h000077500000000000000000000064311424121717000220670ustar00rootroot00000000000000#ifndef TREEGROUPCOMMAND_H #define TREEGROUPCOMMAND_H /* * treesharedcommand.h * Mothur * * Created by Sarah Westcott on 4/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "groupmap.h" #include "validcalculator.h" #include "tree.h" #include "counttable.h" #include "readmatrix.hpp" #include "readcolumn.h" #include "readphylip.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "sharednseqs.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedkstest.h" #include "whittaker.h" #include "sharedochiai.h" #include "sharedanderbergs.h" #include "sharedkulczynski.h" #include "sharedkulczynskicody.h" #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" #include "whittaker.h" #include "odum.h" #include "canberra.h" #include "structeuclidean.h" #include "structchord.h" #include "hellinger.h" #include "manhattan.h" #include "structpearson.h" #include "soergel.h" #include "spearman.h" #include "structkulczynski.h" #include "structchi2.h" #include "speciesprofile.h" #include "hamming.h" #include "gower.h" #include "memchi2.h" #include "memchord.h" #include "memeuclidean.h" #include "mempearson.h" #include "sharedrjsd.h" #include "sharedjsd.h" /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. The user can select the lines or labels they wish to use as well as the groups they would like included. They can also use as many or as few calculators as they wish. */ /**************************************************************************************************************/ class TreeSharedCommand : public Command { public: TreeSharedCommand(string); TreeSharedCommand(); ~TreeSharedCommand(); vector setParameters(); string getCommandName() { return "tree.shared"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; } string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string lastLabel; string format, groupNames, filename, sharedfile, countfile, inputfile; int numGroups, subsampleSize, iters, processors; ofstream out; float precision, cutoff; vector Treenames; bool abort, allLines, subsample, withReplacement; set labels; //holds labels to be used string phylipfile, columnfile, namefile, calc, groups, label; vector Estimators, Groups, outputNames; //holds estimators to be used int createProcesses(SharedRAbundVectors*& thisLookup, CountTable&); void printSims(ostream&, vector< vector >&, vector); vector< vector > makeSimsDist(SparseDistanceMatrix*, int); }; /**************************************************************************************************************/ #endif mothur-1.48.0/source/commands/trimflowscommand.cpp000077500000000000000000001333561424121717000223310ustar00rootroot00000000000000/* * trimflowscommand.cpp * Mothur * * Created by Pat Schloss on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "trimflowscommand.h" #include "needlemanoverlap.hpp" #include "counttable.h" //********************************************************************************************************************** vector TrimFlowsCommand::setParameters(){ try { CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos); CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows); CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise); CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter porder("floworder", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["file"] = tempOutNames; outputTypes["count"] = tempOutNames; abort = false; calledHelp = false; comboStarts = 0; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string TrimFlowsCommand::getHelpString(){ try { string helpString = ""; helpString += "The trim.flows command reads a flowgram file and creates .....\n"; helpString += "The oligos parameter allows you to provide an oligos file.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The floworder parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"; ; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string TrimFlowsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "flow") { pattern = "[filename],[tag],flow"; } else if (type == "count") { pattern = "[filename],flow.count_table"; } else if (type == "fasta") { pattern = "[filename],flow.fasta"; } else if (type == "file") { pattern = "[filename],flow.files"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** TrimFlowsCommand::TrimFlowsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; flowFileName = validParameter.validFile(parameters, "flow"); if (flowFileName == "not found") { flowFileName = current->getFlowFile(); if (flowFileName != "") { m->mothurOut("Using " + flowFileName + " as input file for the flow parameter.\n"); } else { m->mothurOut("No valid current flow file. You must provide a flow file.\n"); abort = true; } }else if (flowFileName == "not open") { flowFileName = ""; abort = true; } if (outputdir == ""){ outputdir += util.hasPath(flowFileName); } string temp = validParameter.valid(parameters, "minflows"); if (temp == "not found") { temp = "450"; } util.mothurConvert(temp, minFlows); temp = validParameter.valid(parameters, "maxflows"); if (temp == "not found") { temp = "450"; } util.mothurConvert(temp, maxFlows); temp = validParameter.validFile(parameters, "oligos"); if (temp == "not found") { oligoFileName = ""; } else if(temp == "not open") { abort = true; } else { oligoFileName = temp; current->setOligosFile(oligoFileName); } temp = validParameter.valid(parameters, "fasta"); if (temp == "not found"){ fasta = 0; } else if(util.isTrue(temp)) { fasta = 1; } temp = validParameter.valid(parameters, "maxhomop"); if (temp == "not found"){ temp = "9"; } util.mothurConvert(temp, maxHomoP); temp = validParameter.valid(parameters, "signal"); if (temp == "not found"){ temp = "0.50"; } util.mothurConvert(temp, signal); temp = validParameter.valid(parameters, "noise"); if (temp == "not found"){ temp = "0.70"; } util.mothurConvert(temp, noise); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found"){ temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "floworder"); if (temp == "not found"){ temp = "A"; } if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for floworder. floworder options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } else { if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; } else if(toupper(temp[0]) == 'B'){ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; } else if(toupper(temp[0]) == 'I'){ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; } else { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true; } } if(oligoFileName == "") { allFiles = 0; } else { allFiles = 1; } temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "F"; } reorient = util.isTrue(temp); numBarcodes = 0; numFPrimers = 0; numRPrimers = 0; numLinkers = 0; numSpacers = 0; } } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand"); exit(1); } } //*************************************************************************************************************** int TrimFlowsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(flowFileName)); string fastaFileName = getOutputFileName("fasta",variables); if(fasta){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } variables["[tag]"] = "trim"; string trimFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName); variables["[tag]"] = "scrap"; string scrapFlowFileName = getOutputFileName("flow",variables); outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName); createGroup = false; if(oligoFileName != ""){ getOligos(); } createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName); if (m->getControl_pressed()) { return 0; } string flowFilesFileName = getOutputFileName("file",variables); outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); if((allFiles) && (groupMap.size() != 0)) { //print count file string countFileName = getOutputFileName("count",variables); CountTable ct; ct.createTable(groupMap); ct.printCompressedTable(countFileName); outputNames.push_back(countFileName); outputTypes["count"].push_back(countFileName); //run split.groups command string inputString = "flow=" + trimFlowFileName + ", count=" + countFileName; m->mothurOut("\n/******************************************/\n"); m->mothurOut("Generating flow files for each sample...\n\nRunning command: split.groups(" + inputString + ")\n"); current->setMothurCalling(true); SplitGroupCommand* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); map > filenames = splitCommand->getOutputFiles(); delete splitCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); //print file file map >::iterator itFiles = filenames.find("flow"); if (itFiles != filenames.end()) { ofstream output; util.openOutputFile(flowFilesFileName, output); for (int i = 0; i < (itFiles->second).size(); i++) { output << (itFiles->second)[i] << endl; outputNames.push_back((itFiles->second)[i]); } output.close(); }else { ofstream output; util.openOutputFile(flowFilesFileName, output); output << util.getFullPathName(trimFlowFileName) << endl; output.close(); } }else{ ofstream output; util.openOutputFile(flowFilesFileName, output); output << util.getFullPathName(trimFlowFileName) << endl; output.close(); } current->setFileFile(flowFilesFileName); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); //set group file as new current groupfile string currentName = ""; itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } itTypes = outputTypes.find("file"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFileFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "execute"); exit(1); } } /**************************************************************************************************/ struct trimFlowData { MothurOut* m; string flowFileName, flowOrder; OutputWriter* trimFile; OutputWriter* scrapFile; OutputWriter* fastaFile; set badNames; unsigned long long lineStart, lineEnd; bool pairedOligos, reorient, fasta, createGroup; int tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, numFlows, maxHomoP, maxFlows, minFlows; float signal, noise; long long count; vector revPrimer; map barcodes; map primers; vector linker; vector spacer; vector primerNameVector; vector barcodeNameVector; map pairedBarcodes; map pairedPrimers; map groupMap; Utils util; trimFlowData(){} ~trimFlowData() { } trimFlowData(string fn, OutputWriter* tn, OutputWriter* sn, OutputWriter* ffn, bool useFasta, unsigned long long lstart, unsigned long long lend) { fasta = useFasta; fastaFile = ffn; flowFileName = fn; trimFile = tn; scrapFile = sn; lineStart = lstart; lineEnd = lend; m = MothurOut::getInstance(); } void setOligosOptions(bool cg, int pd, int bd, int ld, int sd, int td, map pri, map bar, vector revP, vector li, vector spa, map pbr, map ppr, bool po, vector priNameVector, vector barNameVector, bool reo, float sg, float nos, int mhom, string flo, int mxflo, int mnflo, int nmf) { createGroup = cg; pdiffs = pd; bdiffs = bd; ldiffs = ld; sdiffs = sd; tdiffs = td; barcodes = bar; pairedPrimers = ppr; pairedBarcodes = pbr; pairedOligos = po; primers = pri; revPrimer = revP; linker = li; spacer = spa; primerNameVector = priNameVector; barcodeNameVector = barNameVector; reorient = reo; signal = sg; noise = nos; maxHomoP = mhom; flowOrder = flo; maxFlows = mxflo; minFlows = mnflo; numFlows = nmf; count = 0; } }; //*************************************************************************************************************** void driverCreateTrim(trimFlowData* params){ try { ifstream flowFile; params->util.openInputFile(params->flowFileName, flowFile); flowFile.seekg(params->lineStart); if(params->lineStart == 0){ int temp; flowFile >> temp; gobble(flowFile); } FlowData flowData(params->numFlows, params->signal, params->noise, params->maxHomoP, params->flowOrder); params->count = 0; int numBarcodes = 0; int numLinkers = params->linker.size(); int numSpacers = params->spacer.size(); int numFPrimers = 0; int numRPrimers = 0; TrimOligos* trimOligos = nullptr; if (params->pairedOligos) { trimOligos = new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, params->pairedPrimers, params->pairedBarcodes, false); numBarcodes = params->pairedBarcodes.size(); numFPrimers = params->pairedPrimers.size(); } else { trimOligos = new TrimOligos(params->pdiffs, params->bdiffs, params->ldiffs, params->sdiffs, params->primers, params->barcodes, params->revPrimer, params->linker, params->spacer); numBarcodes = params->barcodes.size(); numFPrimers = params->primers.size(); numRPrimers = params->revPrimer.size(); } TrimOligos* rtrimOligos = nullptr; if (params->reorient) { //create reoriented primer and barcode pairs map rpairedPrimers, rpairedBarcodes; for (map::iterator it = params->pairedPrimers.begin(); it != params->pairedPrimers.end(); it++) { oligosPair tempPair(params->util.reverseOligo((it->second).reverse), (params->util.reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer rpairedPrimers[it->first] = tempPair; } for (map::iterator it = params->pairedBarcodes.begin(); it != params->pairedBarcodes.end(); it++) { oligosPair tempPair(params->util.reverseOligo((it->second).reverse), (params->util.reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[it->first] = tempPair; } int index = rpairedBarcodes.size(); for (map::iterator it = params->barcodes.begin(); it != params->barcodes.end(); it++) { oligosPair tempPair("", params->util.reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[index] = tempPair; index++; } index = rpairedPrimers.size(); for (map::iterator it = params->primers.begin(); it != params->primers.end(); it++) { oligosPair tempPair("", params->util.reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode rpairedPrimers[index] = tempPair; index++; } rtrimOligos = new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes, false); numBarcodes = rpairedBarcodes.size(); } bool moreSeqs = 1; while(moreSeqs) { if (params->m->getControl_pressed()) { break; } int success = 1; int currentSeqDiffs = 0; string trashCode = ""; string commentString = ""; flowData.getNext(flowFile); flowData.capFlows(params->maxFlows); Sequence currSeq = flowData.getSequence(); //for reorient Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); if(!flowData.hasMinFlows(params->minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; } if(!flowData.hasGoodHomoP()){ //screen to see if sequence meets the maximum homopolymer limit success = 0; trashCode += 'h'; } int primerIndex = 0; int barcodeIndex = 0; if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq); if(success > params->ldiffs) { trashCode += 'k'; } else{ currentSeqDiffs += success; } } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); } if(numBarcodes != 0){ vector results = trimOligos->stripBarcode(currSeq, barcodeIndex); if (params->pairedOligos) { success = results[0] + results[2]; commentString += "fbdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + trimOligos->getCodeValue(results[3], params->bdiffs) + ") "; } else { success = results[0]; commentString += "bdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->bdiffs) + ") "; } if(success > params->bdiffs) { trashCode += 'b'; } else{ currentSeqDiffs += success; } } if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq); if(success > params->sdiffs) { trashCode += 's'; } else{ currentSeqDiffs += success; } } if(numFPrimers != 0){ vector results = trimOligos->stripForward(currSeq, primerIndex); if (params->pairedOligos) { success = results[0] + results[2]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + "), rpdiffs=" + toString(results[2]) + "(" + trimOligos->getCodeValue(results[3], params->pdiffs) + ") "; } else { success = results[0]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + ") "; } if(success > params->pdiffs) { trashCode += 'f'; } else{ currentSeqDiffs += success; } } if(numRPrimers != 0){ vector results = trimOligos->stripReverse(currSeq); success = results[0]; commentString += "rpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + ") "; if(success > params->pdiffs) { trashCode += 'r'; } else{ currentSeqDiffs += success; } } if (currentSeqDiffs > params->tdiffs) { trashCode += 't'; } if (params->reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; int thisCurrentSeqsDiffs = 0; string thiscommentString = ""; int thisBarcodeIndex = 0; int thisPrimerIndex = 0; if(numBarcodes != 0){ vector results = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex); if (params->pairedOligos) { thisSuccess = results[0] + results[2]; thiscommentString += "fbdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + rtrimOligos->getCodeValue(results[3], params->bdiffs) + ") "; } else { thisSuccess = results[0]; thiscommentString += "bdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->bdiffs) + ") "; } if(thisSuccess > params->bdiffs) { thisTrashCode += "b"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(numFPrimers != 0){ vector results = rtrimOligos->stripForward(savedSeq, thisPrimerIndex); if (params->pairedOligos) { thisSuccess = results[0] + results[2]; thiscommentString += "fpdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->pdiffs) + "), rpdiffs=" + toString(results[2]) + "(" + rtrimOligos->getCodeValue(results[3], params->pdiffs) + ") "; } else { thisSuccess = results[0]; thiscommentString += "pdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->pdiffs) + ") "; } if(thisSuccess > params->pdiffs) { thisTrashCode += "f"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if (thisCurrentSeqsDiffs > params->tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { trashCode = thisTrashCode; success = thisSuccess; currentSeqDiffs = thisCurrentSeqsDiffs; commentString = thiscommentString; barcodeIndex = thisBarcodeIndex; primerIndex = thisPrimerIndex; savedSeq.reverseComplement(); currSeq.setAligned(savedSeq.getAligned()); }else { trashCode += "(" + thisTrashCode + ")"; } } currSeq.setComment(commentString); if(trashCode.length() == 0){ string thisGroup = ""; if (params->createGroup) { if(numBarcodes != 0){ thisGroup = params->barcodeNameVector[barcodeIndex]; if (numFPrimers != 0) { if (params->primerNameVector[primerIndex] != "") { if(thisGroup != "") { thisGroup += "." + params->primerNameVector[primerIndex]; } else { thisGroup = params->primerNameVector[primerIndex]; } } } } } int pos = thisGroup.find("ignore"); if (pos == string::npos) { flowData.printFlows(params->trimFile); if(params->fasta) { currSeq.printSequence(params->fastaFile); } if (thisGroup != "") { params->groupMap[currSeq.getName()] = thisGroup; } } }else{ params->badNames.insert(currSeq.getName()); flowData.printFlows(params->scrapFile, trashCode); } params->count++; if((params->count) % 10000 == 0){ params->m->mothurOut(toString(params->count)+"\n"); } #if defined NON_WINDOWS unsigned long long pos = flowFile.tellg(); if ((pos == -1) || (pos >= params->lineEnd)) { break; } #else if ((params->count == params->lineEnd) || (flowFile.eof())) { break; } #endif } //report progress if((params->count) % 10000 != 0){ params->m->mothurOut(toString(params->count)+"\n"); } flowFile.close(); delete trimOligos; if (params->reorient) { delete rtrimOligos; } } catch(exception& e) { params->m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim"); exit(1); } } //*************************************************************************************************************** int TrimFlowsCommand::getOligos(){ try { bool allBlank = false; Oligos oligos; oligos.read(oligoFileName); if (m->getControl_pressed()) { return 0; } //error in reading oligos if (oligos.hasPairedBarcodes()) { pairedOligos = true; pairedPrimers = oligos.getPairedPrimers(); numFPrimers = pairedPrimers.size(); pairedBarcodes = oligos.getPairedBarcodes(); numBarcodes = pairedBarcodes.size(); }else { pairedOligos = false; primers = oligos.getPrimers(); numFPrimers = primers.size(); barcodes = oligos.getBarcodes(); numBarcodes = barcodes.size(); } barcodeNameVector = oligos.getBarcodeNames(); primerNameVector = oligos.getPrimerNames(); linker = oligos.getLinkers(); numLinkers = linker.size(); spacer = oligos.getSpacers(); numSpacers = spacer.size(); revPrimer = oligos.getReversePrimers(); numRPrimers = revPrimer.size(); vector groupNames = oligos.getGroupNames(); if (groupNames.size() == 0) { allFiles = 0; allBlank = true; } else { createGroup = true; } return 0; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "getOligos"); exit(1); } } /**************************************************************************************************/ vector TrimFlowsCommand::getFlowFileBreaks() { try{ vector filePos; filePos.push_back(0); FILE * pFile; double size = 0.0; //get num bytes in file flowFileName = util.getFullPathName(flowFileName); pFile = fopen (flowFileName.c_str(),"rb"); if (pFile==nullptr) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); } //estimate file breaks double chunkSize = 0; chunkSize = size / processors; //file too small to divide by processors if (chunkSize == 0) { processors = 1; filePos.push_back(size); return filePos; } //for each process seekg to closest file break and search for next '>' char. make that the filebreak for (int i = 0; i < processors; i++) { double spot = (i+1) * chunkSize; ifstream in; util.openInputFile(flowFileName, in); in.seekg(spot); string dummy = util.getline(in); //there was not another sequence before the end of the file double sanityPos = in.tellg(); // if (sanityPos == -1) { break; } // else { filePos.push_back(newSpot); } if (sanityPos == -1) { break; } else { filePos.push_back(sanityPos); } in.close(); } //save end pos filePos.push_back(size); //sanity check filePos for (int i = 0; i < (filePos.size()-1); i++) { if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } } ifstream in; util.openInputFile(flowFileName, in); in >> numFlows; gobble(in); in.close(); processors = (filePos.size() - 1); return filePos; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks"); exit(1); } } /**************************************************************************************************/ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName){ try { time_t start = time(nullptr); ifstream in; util.openInputFile(flowFileName, in); in >> numFlows; in.close(); vector lines; #if defined NON_WINDOWS vector flowFilePos = getFlowFileBreaks(); for (int i = 0; i < (flowFilePos.size()-1); i++) { lines.push_back(linePair(flowFilePos[i], flowFilePos[(i+1)])); } #else if (processors == 1) { lines.push_back(linePair(0, -1)); } else { long long numFlowLines; vector flowFilePos = util.setFilePosEachLine(flowFileName, numFlowLines); //figure out how many sequences you have to process int numSeqsPerProcessor = numFlowLines / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFlowLines - i * numSeqsPerProcessor; } lines.push_back(linePair(flowFilePos[startIndex], numSeqsPerProcessor)); } } #endif //create array of worker threads vector workerThreads; vector data; ofstream outTrim, outScrap; util.openOutputFile(trimFlowFileName, outTrim); outTrim << maxFlows << endl; outTrim.close(); util.openOutputFile(scrapFlowFileName, outScrap); outScrap << numFlows << endl; outScrap.close(); auto synchronizedOutputTrimFile = std::make_shared(trimFlowFileName, true); //append auto synchronizedOutputScrapFile = std::make_shared(scrapFlowFileName, true); //append auto synchronizedOutputFastaFile = std::make_shared(fastaFileName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadTrimWriter = new OutputWriter(synchronizedOutputTrimFile); OutputWriter* threadScrapWriter = new OutputWriter(synchronizedOutputScrapFile); OutputWriter* threadFastaWriter = nullptr; if (fasta) { threadFastaWriter = new OutputWriter(synchronizedOutputFastaFile); } trimFlowData* dataBundle = new trimFlowData(flowFileName, threadTrimWriter, threadScrapWriter, threadFastaWriter, fasta, lines[i+1].start, lines[i+1].end); dataBundle->setOligosOptions(createGroup, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos, primerNameVector, barcodeNameVector, reorient, signal, noise, maxHomoP, flowOrder, maxFlows, minFlows, numFlows); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverCreateTrim, dataBundle)); } OutputWriter* threadTrimWriter = new OutputWriter(synchronizedOutputTrimFile); OutputWriter* threadScrapWriter = new OutputWriter(synchronizedOutputScrapFile); OutputWriter* threadFastaWriter = nullptr; if (fasta) { threadFastaWriter = new OutputWriter(synchronizedOutputFastaFile); } trimFlowData* dataBundle = new trimFlowData(flowFileName, threadTrimWriter, threadScrapWriter, threadFastaWriter, fasta, lines[0].start, lines[0].end); dataBundle->setOligosOptions(createGroup, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos, primerNameVector, barcodeNameVector, reorient, signal, noise, maxHomoP, flowOrder, maxFlows, minFlows, numFlows); driverCreateTrim(dataBundle); long long num = dataBundle->count; set badNames = dataBundle->badNames; groupMap = dataBundle->groupMap; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->trimFile; delete data[i]->scrapFile; if (fasta) { delete data[i]->fastaFile; } badNames.insert(data[i]->badNames.begin(), data[i]->badNames.end()); groupMap.insert(data[i]->groupMap.begin(), data[i]->groupMap.end()); delete data[i]; delete workerThreads[i]; } delete threadTrimWriter; delete threadScrapWriter; if (fasta) { delete threadFastaWriter; } delete dataBundle; m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to trim " + toString(num) + " sequences."); if (m->getDebug()) { m->mothurOut("Scrapped " + toString(badNames.size()) + ".\n"); } return num; } catch(exception& e) { m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/trimflowscommand.h000077500000000000000000000033771424121717000217750ustar00rootroot00000000000000#ifndef TRIMFLOWSCOMMAND_H #define TRIMFLOWSCOMMAND_H /* * trimflowscommand.h * Mothur * * Created by Pat Schloss on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "sequence.hpp" #include "flowdata.h" #include "groupmap.h" #include "trimoligos.h" #include "oligos.h" #include "splitgroupscommand.h" class TrimFlowsCommand : public Command { public: TrimFlowsCommand(string); ~TrimFlowsCommand() = default; vector setParameters(); string getCommandName() { return "trim.flows"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; } string getDescription() { return "trim.flows"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector outputNames; set filesToRemove; int numFPrimers, numRPrimers, numBarcodes, processors, numFlows, comboStarts; int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers; float signal, noise; bool fasta, pairedOligos, reorient, allFiles, abort, createGroup; string flowOrder, flowFileName, oligoFileName; map pairedBarcodes; map pairedPrimers; map barcodes; map primers; vector linker; vector spacer; vector primerNameVector; vector barcodeNameVector; vector revPrimer; map groupMap; vector getFlowFileBreaks(); int createProcessesCreateTrim(string, string, string, string); int getOligos(); }; #endif mothur-1.48.0/source/commands/trimseqscommand.cpp000066400000000000000000001763331424121717000221510ustar00rootroot00000000000000/* * trimseqscommand.cpp * Mothur * * Created by Pat Schloss on 6/6/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "trimseqscommand.h" #include "needlemanoverlap.hpp" #include "trimoligos.h" #include "removeseqscommand.h" //********************************************************************************************************************** vector TrimSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip); CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient); CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig); CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop); CommandParameter pminlength("minlength", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs); CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs); CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles); CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward); CommandParameter plogtransform("logtransform", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plogtransform); CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim); CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold); CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage); CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage); CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage); CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize); CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize); CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst); CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["qfile"] = tempOutNames; //outputTypes["group"] = tempOutNames; //outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; abort = false; calledHelp = false; comboStarts = 0; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string TrimSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as count files if you provide and oligos file.\n"; helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n"; helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"; helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n"; helpString += "The oligos parameter allows you to provide an oligos file.\n"; helpString += "The name parameter allows you to provide a names file with your fasta file.\n"; helpString += "The count parameter allows you to provide a count file with your fasta file.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambiguous bases allowed. The default is -1.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The minlength parameter allows you to set and minimum sequence length. \n"; helpString += "The maxlength parameter allows you to set and maximum sequence length. \n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; helpString += "The qfile parameter allows you to provide a quality file.\n"; helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n"; helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n"; helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n"; helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n"; helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n"; helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n"; helpString += "The logtransform parameter allows you to indicate you want the averages for the qwindowaverage, rollaverage and qaverage to be calculated using a logtransform. Default=F.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n"; helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n"; helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n"; helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n"; helpString += "The trim.seqs command should be in the following format: \n"; helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, \n"; helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n"; helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n"; helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n"; return helpString; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string TrimSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "qfile") { pattern = "[filename],[tag],qual"; } else if (type == "fasta") { pattern = "[filename],[tag],fasta"; } else if (type == "count") { pattern = "[filename],[tag],count_table-[filename],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "getOutputPattern"); exit(1); } } //*************************************************************************************************************** TrimSeqsCommand::TrimSeqsCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastaFile = validParameter.validFile(parameters, "fasta"); if (fastaFile == "not found") { fastaFile = current->getFastaFile(); if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else if (fastaFile == "not open") { abort = true; } else { current->setFastaFile(fastaFile); } if (outputdir == ""){ outputdir += util.hasPath(fastaFile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string temp = validParameter.valid(parameters, "flip"); if (temp == "not found") { flip = 0; } else { flip = util.isTrue(temp); } temp = validParameter.validFile(parameters, "oligos"); if (temp == "not found"){ oligoFile = ""; } else if(temp == "not open"){ abort = true; } else { oligoFile = temp; current->setOligosFile(oligoFile); } temp = validParameter.valid(parameters, "maxambig"); if (temp == "not found") { temp = "-1"; } util.mothurConvert(temp, maxAmbig); temp = validParameter.valid(parameters, "maxhomop"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, maxHomoP); temp = validParameter.valid(parameters, "minlength"); if (temp == "not found") { temp = "1"; } util.mothurConvert(temp, minLength); temp = validParameter.valid(parameters, "maxlength"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, maxLength); temp = validParameter.valid(parameters, "bdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, bdiffs); temp = validParameter.valid(parameters, "pdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, pdiffs); temp = validParameter.valid(parameters, "ldiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, ldiffs); temp = validParameter.valid(parameters, "sdiffs"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, sdiffs); temp = validParameter.valid(parameters, "tdiffs"); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); } util.mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } temp = validParameter.validFile(parameters, "qfile"); if (temp == "not found") { qFileName = ""; } else if(temp == "not open") { abort = true; } else { qFileName = temp; current->setQualFile(qFileName); } temp = validParameter.validFile(parameters, "name"); if (temp == "not found") { nameFile = ""; } else if(temp == "not open") { nameFile = ""; abort = true; } else { nameFile = temp; current->setNameFile(nameFile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name.\n"); abort = true; } temp = validParameter.valid(parameters, "qthreshold"); if (temp == "not found") { temp = "0"; } util.mothurConvert(temp, qThreshold); temp = validParameter.valid(parameters, "qtrim"); if (temp == "not found") { temp = "t"; } qtrim = util.isTrue(temp); temp = validParameter.valid(parameters, "rollaverage"); if (temp == "not found") { temp = "0"; } convert(temp, qRollAverage); temp = validParameter.valid(parameters, "qwindowaverage");if (temp == "not found") { temp = "0"; } convert(temp, qWindowAverage); temp = validParameter.valid(parameters, "qwindowsize"); if (temp == "not found") { temp = "50"; } convert(temp, qWindowSize); temp = validParameter.valid(parameters, "qstepsize"); if (temp == "not found") { temp = "1"; } convert(temp, qWindowStep); temp = validParameter.valid(parameters, "qaverage"); if (temp == "not found") { temp = "0"; } convert(temp, qAverage); temp = validParameter.valid(parameters, "keepfirst"); if (temp == "not found") { temp = "0"; } convert(temp, keepFirst); temp = validParameter.valid(parameters, "removelast"); if (temp == "not found") { temp = "0"; } convert(temp, removeLast); temp = validParameter.valid(parameters, "allfiles"); if (temp == "not found") { temp = "F"; } allFiles = util.isTrue(temp); temp = validParameter.valid(parameters, "keepforward"); if (temp == "not found") { temp = "F"; } keepforward = util.isTrue(temp); temp = validParameter.valid(parameters, "logtransform"); if (temp == "not found") { temp = "F"; } logtransform = util.isTrue(temp); temp = validParameter.valid(parameters, "checkorient"); if (temp == "not found") { temp = "F"; } reorient = util.isTrue(temp); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); if(allFiles && (oligoFile == "")){ m->mothurOut("You selected allfiles, but didn't enter an oligos. Ignoring the allfiles request.\n"); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored.\n"); qAverage=0; qThreshold=0; } if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){ m->mothurOut("You didn't set any options... quiting command.\n"); abort = true; } } pairedOligos = false; createGroup = false; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand"); exit(1); } } //*************************************************************************************************************** int TrimSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastaFile)); variables["[tag]"] = "trim"; string trimSeqFile = getOutputFileName("fasta",variables); string trimQualFile = getOutputFileName("qfile",variables); outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); variables["[tag]"] = "scrap"; string scrapSeqFile = getOutputFileName("fasta",variables); string scrapQualFile = getOutputFileName("qfile",variables); outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile); if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qfile"].push_back(trimQualFile); outputTypes["qfile"].push_back(scrapQualFile); } if (nameFile != "") { //convert to count file CountTable ct; ct.readTable(nameFile, "name"); map mvariables; mvariables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(nameFile)); countfile = getOutputFileName("count",mvariables); ct.printTable(countfile); nameFile = ""; } if (countfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); } variables["[tag]"] = "trim"; string trimCountFile = getOutputFileName("count",variables); variables["[tag]"] = "scrap"; string scrapCountFile = getOutputFileName("count",variables); if (countfile != "") { CountTable ct; ct.readTable(countfile, true, false); nameCount = ct.getNameMap(); outputNames.push_back(trimCountFile); outputNames.push_back(scrapCountFile); outputTypes["count"].push_back(trimCountFile); outputTypes["count"].push_back(scrapCountFile); } if (m->getControl_pressed()) { return 0; } int startTime = time(nullptr); unordered_set badNames; long long numSeqs = createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, badNames); m->mothurOut("\nCreating count files...\n"); processNamesCountFiles(trimSeqFile, badNames, trimCountFile, scrapCountFile); if ((groupCounts.size() != 0) && (countfile == "")){ //we didnt start with a count file, but used an oligos file to assign sequences to groups outputNames.push_back(trimCountFile); outputNames.push_back(scrapCountFile); outputTypes["count"].push_back(trimCountFile); outputTypes["count"].push_back(scrapCountFile); } m->mothurOut("It took " + toString(time(nullptr) - startTime) + " secs to trim " + toString(numSeqs) + " sequences.\n"); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output group counts m->mothurOutEndLine(); int total = 0; if (groupCounts.size() != 0) { m->mothurOut("Group count: \n"); } for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)+"\n"); } if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)+"\n"); } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("qfile"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setQualFile(currentName); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setGroupFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "execute"); exit(1); } } //*************************************************************************************************************** bool keepFirstTrim(Sequence& sequence, QualityScores& qscores, int keepFirst){ bool success = 1; if(qscores.getName() != ""){ qscores.trimQScores(-1, keepFirst); } sequence.trim(keepFirst); return success; } //*************************************************************************************************************** bool removeLastTrim(Sequence& sequence, QualityScores& qscores, int removeLast){ bool success = 0; int length = sequence.getNumBases() - removeLast; if(length > 0){ if(qscores.getName() != ""){ qscores.trimQScores(-1, length); } sequence.trim(length); success = 1; } else{ success = 0; } return success; } //*************************************************************************************************************** bool cullLength(Sequence& seq, int minLength, int maxLength){ int length = seq.getNumBases(); bool success = 0; //guilty until proven innocent if(length >= minLength && maxLength == 0) { success = 1; } else if(length >= minLength && length <= maxLength) { success = 1; } else { success = 0; } return success; } //*************************************************************************************************************** bool cullHomoP(Sequence& seq, int maxHomoP){ int longHomoP = seq.getLongHomoPolymer(); bool success = 0; //guilty until proven innocent if(longHomoP <= maxHomoP){ success = 1; } else { success = 0; } return success; } //*************************************************************************************************************** bool cullAmbigs(Sequence& seq, int maxAmbig){ int numNs = seq.getAmbigBases(); bool success = 0; //guilty until proven innocent if(numNs <= maxAmbig) { success = 1; } else { success = 0; } return success; } /**************************************************************************************************/ struct trimData { unsigned long long start, end; MothurOut* m; Oligos oligos; string filename, qFileName; OutputWriter* trimFileName; OutputWriter* scrapFileName; OutputWriter* trimQFileName; OutputWriter* scrapQFileName; unordered_set badNames; unsigned long long lineStart, lineEnd, qlineStart, qlineEnd; bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform; int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; int qWindowSize, qWindowStep, keepFirst, removeLast, count; double qRollAverage, qThreshold, qWindowAverage, qAverage; vector revPrimer; map barcodes; map primers; map nameCount; map seq2Group; vector linker; vector spacer; map combos; map groupCounts; map pairedBarcodes; map pairedPrimers; Utils util; trimData(){} trimData(string fn, string qn, OutputWriter* tn, OutputWriter* sn, OutputWriter* tqn, OutputWriter* sqn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, map ncount) { filename = fn; qFileName = qn; trimFileName = tn; scrapFileName = sn; trimQFileName = tqn; scrapQFileName = sqn; lineStart = lstart; lineEnd = lend; qlineStart = qstart; qlineEnd = qend; m = MothurOut::getInstance(); nameCount = ncount; } void setOligosOptions(Oligos olig, int pd, int bd, int ld, int sd, int td, bool po, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt, int minL, int maxA, int maxH, int maxL, bool fli, bool reo) { oligos = olig; pdiffs = pd; bdiffs = bd; ldiffs = ld; sdiffs = sd; tdiffs = td; barcodes = oligos.getBarcodes(); pairedPrimers = oligos.getPairedPrimers(); pairedBarcodes = oligos.getPairedBarcodes(); pairedOligos = po; primers = oligos.getPrimers(); revPrimer = oligos.getReversePrimers(); linker = oligos.getLinkers(); spacer = oligos.getSpacers(); createGroup = cGroup; allFiles = aFiles; keepforward = keepF; keepFirst = keepfi; removeLast = removeL; qWindowStep = WindowStep; qWindowSize = WindowSize; qWindowAverage = WindowAverage; qtrim = trim; qThreshold = Threshold; qAverage = Average; qRollAverage = RollAverage; logtransform = lt; minLength = minL; maxAmbig = maxA; maxHomoP = maxH; maxLength = maxL; flip = fli; reorient = reo; count = 0; } }; /**************************************************************************************/ //string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector > fastaFileNames, vector > qualFileNames, vector > nameFileNames, linePair line, linePair qline int driverTrim(trimData* params) { try { int numFPrimers, numRPrimers, numLinkers, numSpacers; numFPrimers = params->primers.size(); numRPrimers = params->revPrimer.size(); numLinkers = params->linker.size(); numSpacers = params->spacer.size(); ifstream inFASTA; params->util.openInputFile(params->filename, inFASTA); inFASTA.seekg(params->lineStart); ifstream qFile; if(params->qFileName != "") { params->util.openInputFile(params->qFileName, qFile); qFile.seekg(params->qlineStart); } bool moreSeqs = 1; int numBarcodes = params->barcodes.size(); TrimOligos* trimOligos = nullptr; if (params->pairedOligos) { trimOligos = new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, params->pairedPrimers, params->pairedBarcodes, false); numBarcodes = params->pairedBarcodes.size(); numFPrimers = params->pairedPrimers.size(); } else { trimOligos = new TrimOligos(params->pdiffs, params->bdiffs, params->ldiffs, params->sdiffs, params->primers, params->barcodes, params->revPrimer, params->linker, params->spacer); } TrimOligos* rtrimOligos = nullptr; if (params->reorient) { //create reoriented primer and barcode pairs map rpairedPrimers, rpairedBarcodes; for (map::iterator it = params->pairedPrimers.begin(); it != params->pairedPrimers.end(); it++) { oligosPair tempPair(params->util.reverseOligo((it->second).reverse), (params->util.reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer rpairedPrimers[it->first] = tempPair; } for (map::iterator it = params->pairedBarcodes.begin(); it != params->pairedBarcodes.end(); it++) { oligosPair tempPair(params->util.reverseOligo((it->second).reverse), (params->util.reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[it->first] = tempPair; } int index = rpairedBarcodes.size(); for (map::iterator it = params->barcodes.begin(); it != params->barcodes.end(); it++) { oligosPair tempPair("", params->util.reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[index] = tempPair; index++; } index = rpairedPrimers.size(); for (map::iterator it = params->primers.begin(); it != params->primers.end(); it++) { oligosPair tempPair("", params->util.reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode rpairedPrimers[index] = tempPair; index++; } rtrimOligos = new TrimOligos(params->pdiffs, params->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes, false); numBarcodes = rpairedBarcodes.size(); } //if(numBarcodes == 0){ params->createGroup = false; } primers can have names while (moreSeqs) { int obsBDiffs = 0; int obsPDiffs = 0; if (params->m->getControl_pressed()) { break; } int success = 1; string trashCode = ""; string commentString = ""; int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); gobble(inFASTA); Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); QualityScores currQual; QualityScores savedQual; if(params->qFileName != ""){ currQual = QualityScores(qFile); gobble(qFile); savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); } string origSeq = currSeq.getUnaligned(); if (origSeq != "") { int barcodeIndex = 0; int primerIndex = 0; if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq, currQual); if(success > params->ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } if(numBarcodes != 0){ vector results = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex); if (params->pairedOligos) { success = results[0] + results[2]; commentString += "fbdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + trimOligos->getCodeValue(results[3], params->bdiffs) + ") "; } else { success = results[0]; commentString += "bdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->bdiffs) + ") "; } if(success > params->bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } obsBDiffs = success; if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > params->sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } if(numFPrimers != 0){ vector results = trimOligos->stripForward(currSeq, currQual, primerIndex, params->keepforward); if (params->pairedOligos) { success = results[0] + results[2]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + "), rpdiffs=" + toString(results[2]) + "(" + trimOligos->getCodeValue(results[3], params->pdiffs) + ") "; } else { success = results[0]; commentString += "fpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + ") "; } if(success > params->pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } obsPDiffs = success; if(numRPrimers != 0){ vector results = trimOligos->stripReverse(currSeq, currQual); success = results[0]; commentString += "rpdiffs=" + toString(results[0]) + "(" + trimOligos->getCodeValue(results[1], params->pdiffs) + ") "; if(success > params->pdiffs) { trashCode += 'r'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > params->tdiffs) { trashCode += 't'; } if (params->reorient && (trashCode != "")) { //if you failed and want to check the reverse int thisSuccess = 0; string thisTrashCode = ""; string thiscommentString = ""; int thisCurrentSeqsDiffs = 0; int thisBarcodeIndex = 0; int thisPrimerIndex = 0; if(numBarcodes != 0){ vector results = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); if (params->pairedOligos) { thisSuccess = results[0] + results[2]; thiscommentString += "fbdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->bdiffs) + "), rbdiffs=" + toString(results[2]) + "(" + rtrimOligos->getCodeValue(results[3], params->bdiffs) + ") "; } else { thisSuccess = results[0]; thiscommentString += "bdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->bdiffs) + ") "; } if(thisSuccess > params->bdiffs) { thisTrashCode += "b"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } int revBDiffs = thisSuccess; if(numFPrimers != 0){ vector results = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, params->keepforward); if (params->pairedOligos) { thisSuccess = results[0] + results[2]; thiscommentString += "fpdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->pdiffs) + "), rpdiffs=" + toString(results[2]) + "(" + rtrimOligos->getCodeValue(results[3], params->pdiffs) + ") "; } else { thisSuccess = results[0]; thiscommentString += "pdiffs=" + toString(results[0]) + "(" + rtrimOligos->getCodeValue(results[1], params->pdiffs) + ") "; } if(thisSuccess > params->pdiffs) { thisTrashCode += "f"; } else{ thisCurrentSeqsDiffs += thisSuccess; } } int revPDiffs = thisSuccess; if (thisCurrentSeqsDiffs > params->tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { obsPDiffs = revPDiffs; obsBDiffs = revBDiffs; trashCode = thisTrashCode; success = thisSuccess; currentSeqsDiffs = thisCurrentSeqsDiffs; barcodeIndex = thisBarcodeIndex; commentString = thiscommentString; primerIndex = thisPrimerIndex; savedSeq.reverseComplement(); currSeq.setAligned(savedSeq.getAligned()); if(params->qFileName != ""){ savedQual.flipQScores(); currQual.setScores(savedQual.getScores()); } }else { trashCode += "(" + thisTrashCode + ")"; } } if(params->keepFirst != 0){ success = keepFirstTrim(currSeq, currQual, params->keepFirst); } if(params->removeLast != 0){ success = removeLastTrim(currSeq, currQual, params->removeLast); if(!success) { trashCode += 'l'; } } if(params->qFileName != ""){ int origLength = currSeq.getNumBases(); if(!params->util.isEqual(params->qThreshold,0)) { success = currQual.stripQualThreshold(currSeq, params->qThreshold); } else if(!params->util.isEqual(params->qAverage, 0)) { success = currQual.cullQualAverage(currSeq, params->qAverage, params->logtransform); } else if(!params->util.isEqual(params->qRollAverage, 0)) { success = currQual.stripQualRollingAverage(currSeq, params->qRollAverage, params->logtransform); } else if(!params->util.isEqual(params->qWindowAverage, 0)){ success = currQual.stripQualWindowAverage(currSeq, params->qWindowStep, params->qWindowSize, params->qWindowAverage, params->logtransform); } else { success = 1; } //you don't want to trim, if it fails above then scrap it if ((!params->qtrim) && (origLength != currSeq.getNumBases())) { success = 0; } if(!success) { trashCode += 'q'; } } if(params->minLength > 0 || params->maxLength > 0){ success = cullLength(currSeq, params->minLength, params->maxLength); if(!success) { trashCode += 'l'; } } if(params->maxHomoP > 0){ success = cullHomoP(currSeq, params->maxHomoP); if(!success) { trashCode += 'h'; } } if(params->maxAmbig != -1){ success = cullAmbigs(currSeq, params->maxAmbig); if(!success) { trashCode += 'n'; } } if(params->flip){ // should go last currSeq.reverseComplement(); if(params->qFileName != ""){ currQual.flipQScores(); } } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode + "\n"); } string seqComment = currSeq.getComment(); currSeq.setComment("\t" + commentString + "\t" + seqComment); //if count table is provided update counts int numReps = 1; map::iterator itCounts = params->nameCount.find(currSeq.getName()); if (itCounts != params->nameCount.end()) { numReps = itCounts->second; } else { params->nameCount[currSeq.getName()] = 1; } if(trashCode.length() == 0){ string thisGroup = ""; if (params->createGroup) { thisGroup = params->oligos.getGroupName(barcodeIndex, primerIndex); } params->seq2Group[currSeq.getName()] = thisGroup; int pos = thisGroup.find("ignore"); if (pos == string::npos) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(params->trimFileName); if(params->qFileName != ""){ currQual.printQScores(params->trimQFileName); } if (params->createGroup) { if (params->m->getDebug()) { params->m->mothurOut(", group= " + thisGroup + "\n"); } map::iterator it = params->groupCounts.find(thisGroup); if (it == params->groupCounts.end()) { params->groupCounts[thisGroup] = numReps; } else { params->groupCounts[it->first] += numReps; } } } } else{ params->seq2Group[currSeq.getName()] = "scrap"; params->badNames.insert(currSeq.getName()); currSeq.setName(currSeq.getName() + " | " + trashCode); currSeq.setUnaligned(origSeq); currSeq.setAligned(origSeq); currSeq.printSequence(params->scrapFileName); if(params->qFileName != ""){ currQual.printQScores(params->scrapQFileName); } } params->count++; } #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->lineEnd)) { break; } #else if ((params->count == params->lineEnd) || (inFASTA.eof())) { break; } #endif //report progress if((params->count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } } //report progress if((params->count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(params->count)+"\n"); } delete trimOligos; if (params->reorient) { delete rtrimOligos; } inFASTA.close(); if(params->qFileName != "") { qFile.close(); } return params->count; } catch(exception& e) { params->m->errorOut(e, "TrimSeqsCommand", "driverTrim"); exit(1); } } /**************************************************************************************************/ long long TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, unordered_set& badNames) { try { string groupFile; Oligos oligos; if(oligoFile != ""){ oligos.read(oligoFile); if (m->getControl_pressed()) { return 0; } //error in reading oligos if (oligos.hasPairedBarcodes() || oligos.hasPairedPrimers()) { pairedOligos = true; } else { pairedOligos = false; } vector groupNames = oligos.getGroupNames(); if (groupNames.size() == 0) { allFiles = 0; } else { createGroup = true; } } //create array of worker threads vector workerThreads; vector data; //fills lines and qlines setLines(filename, qFileName); auto synchronizedOutputFastaTrimFile = std::make_shared(trimFASTAFileName); auto synchronizedOutputFastaScrapFile = std::make_shared(scrapFASTAFileName); auto synchronizedOutputQTrimFile = std::make_shared(trimQualFileName); auto synchronizedOutputQScrapFile = std::make_shared(scrapQualFileName); //Lauch worker threads for (int i = 0; i < processors-1; i++) { OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (qFileName != "") { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } //string fn, string qn, OutputWriter* tn, OutputWriter* sn, OutputWriter* tqn, OutputWriter* sqn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, map nm, map ncoun trimData* dataBundle = new trimData(filename, qFileName, threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, lines[i+1].start, lines[i+1].end, qLines[i+1].start, qLines[i+1].end, nameCount); dataBundle->setOligosOptions(oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, pairedOligos, createGroup, allFiles, keepforward, keepFirst, removeLast, qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, minLength, maxAmbig, maxHomoP, maxLength, flip, reorient); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverTrim, dataBundle)); } OutputWriter* threadFastaTrimWriter = new OutputWriter(synchronizedOutputFastaTrimFile); OutputWriter* threadFastaScrapWriter = new OutputWriter(synchronizedOutputFastaScrapFile); OutputWriter* threadQTrimWriter = nullptr; OutputWriter* threadQScrapWriter = nullptr; if (qFileName != "") { threadQTrimWriter = new OutputWriter(synchronizedOutputQTrimFile); threadQScrapWriter = new OutputWriter(synchronizedOutputQScrapFile); } //string fn, string qn, OutputWriter* tn, OutputWriter* sn, OutputWriter* tqn, OutputWriter* sqn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, map nm, map ncoun trimData* dataBundle = new trimData(filename, qFileName, threadFastaTrimWriter, threadFastaScrapWriter, threadQTrimWriter, threadQScrapWriter, lines[0].start, lines[0].end, qLines[0].start, qLines[0].end, nameCount); dataBundle->setOligosOptions(oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, pairedOligos, createGroup, allFiles, keepforward, keepFirst, removeLast, qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, minLength, maxAmbig, maxHomoP, maxLength, flip, reorient); driverTrim(dataBundle); long long num = dataBundle->count; badNames = dataBundle->badNames; groupCounts = dataBundle->groupCounts; seq2Group = dataBundle->seq2Group; nameCount = dataBundle->nameCount; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; delete data[i]->trimFileName; delete data[i]->scrapFileName; if (qFileName != "") { delete data[i]->trimQFileName; delete data[i]->scrapQFileName; } badNames.insert(data[i]->badNames.begin(), data[i]->badNames.end()); seq2Group.insert(data[i]->seq2Group.begin(), data[i]->seq2Group.end()); nameCount.insert(data[i]->nameCount.begin(), data[i]->nameCount.end()); //merge counts for (map::iterator it = data[i]->groupCounts.begin(); it != data[i]->groupCounts.end(); it++) { map::iterator itMine = groupCounts.find(it->first); if (itMine != groupCounts.end()) { itMine->second += it->second; } else { groupCounts[it->first] = it->second; } } delete data[i]; delete workerThreads[i]; } synchronizedOutputFastaTrimFile->close(); synchronizedOutputQScrapFile->close(); delete threadFastaTrimWriter; delete threadFastaScrapWriter; if (qFileName != "") { synchronizedOutputQTrimFile->close(); synchronizedOutputFastaTrimFile->close(); delete threadQTrimWriter; delete threadQScrapWriter; } delete dataBundle; return num; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim"); exit(1); } } /**************************************************************************************************/ int TrimSeqsCommand::processNamesCountFiles(string trimFasta, unordered_set badNames, string trimCountFileName, string scrapCountFileName) { try { if (groupCounts.size() != 0) { CountTable newCt; if (badNames.size() != 0) { newCt.addGroup("scrap"); } for (map::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); } vector namesOfGroups = newCt.getNamesOfGroups(); int count = 0; map groupIndexes; for (int i = 0; i < namesOfGroups.size(); i++) { groupIndexes[namesOfGroups[i]] = count; count++; } for (map::iterator itSeqGroup = seq2Group.begin(); itSeqGroup != seq2Group.end(); itSeqGroup++) { string seqName = itSeqGroup->first; string seqGroup = itSeqGroup->second; map::iterator itCount = nameCount.find(seqName); if (itCount != nameCount.end()) { vector counts; counts.resize(count, 0); counts[groupIndexes[seqGroup]] = itCount->second; newCt.push_back(seqName, counts); }else { m->mothurOut("[ERROR]: missing count info for " + seqName + "\n"); m->setControl_pressed(true); } } //updated to include the group assignments map variables; variables["[tag]"] = "temp"; if (countfile != "") { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); } else { variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastaFile)); } string fullCountFile = getOutputFileName("count",variables); newCt.printTable(fullCountFile); //parse updated count table into scrap and trim files string dupsFile = fullCountFile; string dupsFormat = "count"; if (badNames.size() != 0) { //select bad names for scrap file pair scrapDups(dupsFile, scrapCountFileName); Command* getScrapCommand = new GetSeqsCommand(badNames, nullStringPair, nullStringPair, scrapDups, dupsFormat); delete getScrapCommand; //remove bad names for trim file pair trimDups(dupsFile, trimCountFileName); Command* removeScrapCommand = new RemoveSeqsCommand(badNames, trimDups, dupsFormat); delete removeScrapCommand; outputNames.push_back(trimCountFileName); outputNames.push_back(scrapCountFileName); outputTypes["count"].push_back(trimCountFileName); outputTypes["count"].push_back(scrapCountFileName); util.mothurRemove(fullCountFile); }else { //rename full file to be trim file util.renameFile(fullCountFile, trimCountFileName); CountTable newScrapCt; //scrap file newScrapCt.printTable(scrapCountFileName); outputNames.push_back(trimCountFileName); outputNames.push_back(scrapCountFileName); outputTypes["count"].push_back(trimCountFileName); outputTypes["count"].push_back(scrapCountFileName); } }else { //create a count file without groups CountTable newCt; //trimmed file CountTable newScrapCt; //scrap file for (map::iterator itCount = nameCount.begin(); itCount != nameCount.end(); itCount++) { if (badNames.count(itCount->first) == 0) { newCt.push_back(itCount->first, itCount->second); } else { newScrapCt.push_back(itCount->first, itCount->second); } } newCt.printTable(trimCountFileName); newScrapCt.printTable(scrapCountFileName); outputNames.push_back(trimCountFileName); outputNames.push_back(scrapCountFileName); outputTypes["count"].push_back(trimCountFileName); outputTypes["count"].push_back(scrapCountFileName); } if(allFiles){ //run split.groups command //use unique.seqs to create new name and fastafile string inputString = "fasta=" + trimFasta; if (countfile != "") { inputString += ", count=" + trimCountFileName; } m->mothurOut("/******************************************/\n"); m->mothurOut("Generating allfiles... Running command: split.groups(" + inputString + ")\n"); current->setMothurCalling(true); Command* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); map > filenames = splitCommand->getOutputFiles(); delete splitCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/\n"); } return 0; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "processNamesCountFiles"); exit(1); } } /**************************************************************************************************/ int TrimSeqsCommand::setLines(string filename, string qfilename) { try { vector fastaFilePos; vector qfileFilePos; #if defined NON_WINDOWS //set file positions for fasta file fastaFilePos = util.divideFile(filename, processors); //get name of first sequence in each chunk map firstSeqNames; for (int i = 0; i < (fastaFilePos.size()-1); i++) { ifstream in; util.openInputFile(filename, in); in.seekg(fastaFilePos[i]); //adjust start if null strings if (i == 0) { util.zapGremlins(in); gobble(in); } Sequence temp(in); firstSeqNames[temp.getName()] = i; in.close(); } if(qfilename != "") { //seach for filePos of each first name in the qfile and save in qfileFilePos ifstream inQual; util.openInputFile(qfilename, inQual); string input; while(!inQual.eof()){ input = util.getline(inQual); if (input.length() != 0) { if(input[0] == '>'){ //this is a sequence name line istringstream nameStream(input); string sname = ""; nameStream >> sname; sname = sname.substr(1); util.checkName(sname); map::iterator it = firstSeqNames.find(sname); if(it != firstSeqNames.end()) { //this is the start of a new chunk double pos = inQual.tellg(); qfileFilePos.push_back(pos - input.length() - 1); firstSeqNames.erase(it); } } } if (firstSeqNames.size() == 0) { break; } } inQual.close(); if (firstSeqNames.size() != 0) { for (map::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) { m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file.\n"); } qFileName = ""; return processors; } //get last file position of qfile FILE * pFile; double size; //get num bytes in file qfilename = util.getFullPathName(qfilename); pFile = fopen (qfilename.c_str(),"rb"); if (pFile==nullptr) perror ("Error opening file"); else{ fseek (pFile, 0, SEEK_END); size=ftell (pFile); fclose (pFile); } qfileFilePos.push_back(size); } for (int i = 0; i < (fastaFilePos.size()-1); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); } lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)])); if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)])); } } if(qfilename == "") { qLines = lines; } //files with duds return processors; #else long long numFastaSeqs = 0; fastaFilePos = util.setFilePosFasta(filename, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } if (qfilename != "") { long long numQualSeqs = 0; qfileFilePos = util.setFilePosFasta(qfilename, numQualSeqs); if (numFastaSeqs != numQualSeqs) { m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file.\n"); m->setControl_pressed(true); } } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor)); if (qfilename != "") { qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); } } if(qfilename == "") { qLines = lines; } //files with duds return 1; #endif } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "setLines"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/commands/trimseqscommand.h000077500000000000000000000041651424121717000216120ustar00rootroot00000000000000#ifndef TRIMSEQSCOMMAND_H #define TRIMSEQSCOMMAND_H /* * trimseqscommand.h * Mothur * * Created by Pat Schloss on 6/6/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "mothur.h" #include "command.hpp" #include "sequence.hpp" #include "qualityscores.h" #include "trimoligos.h" #include "counttable.h" #include "writer.h" #include "splitgroupscommand.h" #include "oligos.h" class TrimSeqsCommand : public Command { public: TrimSeqsCommand(string); ~TrimSeqsCommand(){} vector setParameters(); string getCommandName() { return "trim.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Trim.seqs"; } string getDescription() { return "provides the preprocessing features needed to screen and sort pyrosequences"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: bool abort, createGroup; string fastaFile, oligoFile, qFileName, nameFile, countfile; bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform; int maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; int qWindowSize, qWindowStep, keepFirst, removeLast; double qRollAverage, qThreshold, qWindowAverage, qAverage; vector outputNames; set filesToRemove; vector groupVector; map groupCounts; map nameCount; //for countfile name -> repCount map seq2Group; vector lines; vector qLines; long long createProcessesCreateTrim(string, string, string, string, string, string, unordered_set&); int processNamesCountFiles(string trimFasta, unordered_set badNames, string trimCountFileName, string scrapCountFileName); int setLines(string, string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/unifracunweightedcommand.cpp000077500000000000000000001100741424121717000240060ustar00rootroot00000000000000/* * unifracunweightedcommand.cpp * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "unifracunweightedcommand.h" #include "treereader.h" #include "subsample.h" #include "consensus.h" //********************************************************************************************************************** vector UnifracUnweightedCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","unweighted-uwsummary",false,true,true); parameters.push_back(ptree); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom); CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus); CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["unweighted"] = tempOutNames; outputTypes["uwsummary"] = tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; outputTypes["tree"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string UnifracUnweightedCommand::getHelpString(){ try { string helpString = ""; helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"; helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"; helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n"; helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n"; helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n"; helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n"; helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n"; helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string UnifracUnweightedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "unweighted") { pattern = "[filename],unweighted-[filename],[tag],unweighted"; } else if (type == "uwsummary") { pattern = "[filename],uwsummary"; } else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; } else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; } else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "getOutputPattern"); exit(1); } } /***********************************************************/ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("You have no current tree file and the tree parameter is required.\n"); abort = true; } }else { current->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if (outputdir == ""){ outputdir = util.hasPath(treefile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } itersString = validParameter.valid(parameters, "iters"); if (itersString == "not found") { itersString = "1000"; } util.mothurConvert(itersString, iters); string temp = validParameter.valid(parameters, "distance"); if (temp == "not found") { phylip = false; outputForm = ""; } else{ if (temp=="phylip") { temp = "lt"; } if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; } else { m->mothurOut("Options for distance are: lt, square, or column. Using lt.\n"); phylip = true; outputForm = "lt"; } } temp = validParameter.valid(parameters, "random"); if (temp == "not found") { temp = "f"; } random = util.isTrue(temp); temp = validParameter.valid(parameters, "root"); if (temp == "not found") { temp = "F"; } includeRoot = util.isTrue(temp); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } current->setProcessors(temp); util.mothurConvert(temp, processors); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (!subsample) { subsampleIters = 0; } else { subsampleIters = iters; } temp = validParameter.valid(parameters, "consensus"); if (temp == "not found") { temp = "F"; } consensus = util.isTrue(temp); if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; } if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } } else { CountTable testCt; if ((!testCt.testGroups(countfile)) && (subsample)) { m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true; } } if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; } if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); if (!random) { iters = 0; } //turn off random calcs } } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand"); exit(1); } } /***********************************************************/ int UnifracUnweightedCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } TreeReader* reader = nullptr; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } else { reader = new TreeReader(treefile, countfile); } vector T; T = reader->getTrees(); //user trees CountTable* ct; ct = T[0]->getCountTable(); if ((Groups.size() == 0) || (Groups.size() < 2)) { Groups = ct->getNamesOfGroups(); } //must have at least 2 groups to compare delete reader; map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); sumFile = getOutputFileName("uwsummary",variables); outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile); util.openOutputFile(sumFile, outSum); long start = time(nullptr); //set or check size if (subsample) { //user has not set size, set size = smallest samples size if (subsampleSize == -1) { subsampleSize = ct->getNumSeqsSmallestGroup(); m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n"); }else { //eliminate any too small groups vector newGroups = Groups; Groups.clear(); for (int i = 0; i < newGroups.size(); i++) { int thisSize = ct->getGroupCount(newGroups[i]); if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); } else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); } } } } if ((Groups.size() == 0) || (Groups.size() < 2)) { Groups = ct->getNamesOfGroups(); } //must have at least 2 groups to compare Unweighted unweighted(includeRoot, Groups); util.getCombos(groupComb, Groups, numComp); numGroups = Groups.size(); if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } if (numComp < processors) { processors = numComp; m->mothurOut("Reducing processors to " + toString(numComp) + ".\n"); } if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; } outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t'; m->mothurOut("Tree#\tGroups\tUWScore\t"); if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); } outSum << endl; m->mothurOutEndLine(); //get pscores for users trees for (int i = 0; i < T.size(); i++) { if (m->getControl_pressed()) { break; } counter = 0; rscoreFreq.resize(numComp); rCumul.resize(numComp); utreeScores.resize(numComp); UWScoreSig.resize(numComp); vector userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC... userData = unweighted.getValues(T[i], processors); //userData[0] = unweightedscore if (m->getControl_pressed()) { break; } //output scores for each combination for(int k = 0; k < numComp; k++) { //saves users score utreeScores[k].push_back(userData[k]); //add users score to validscores validScores[userData[k]] = userData[k]; if (!random) { UWScoreSig[k].push_back(0.0); } } if (random) { runRandomCalcs(T[i], userData); } if (m->getControl_pressed()) { break; } int startSubsample = time(nullptr); //subsample loop SubSample sample; vector< vector > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f. if (m->getControl_pressed()) { break; } //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. CountTable* newCt = new CountTable(); //uses method of setting groups to doNotIncludeMe int sampleTime = 0; if (m->getDebug()) { sampleTime = time(nullptr); } Tree* subSampleTree; if (withReplacement) { subSampleTree = sample.getSampleWithReplacement(T[i], ct, newCt, subsampleSize, Groups); } else { subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize, Groups); } if (m->getDebug()) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(nullptr) - sampleTime) + " seconds to sample tree.\n"); } //call new weighted function vector iterData; iterData.resize(numComp,0); Unweighted thisUnweighted(includeRoot, Groups); iterData = thisUnweighted.getValues(subSampleTree, processors); //userData[0] = weightedscore //save data to make ave dist, std dist calcDistsTotals.push_back(iterData); delete newCt; delete subSampleTree; if((thisIter+1) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); } } if (subsample) { m->mothurOut("It took " + toString(time(nullptr) - startSubsample) + " secs to run the subsampling.\n"); } if (m->getControl_pressed()) { break; } if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); } if (consensus) { getConsensusTrees(calcDistsTotals, i); } //print output files printUWSummaryFile(i); if (random) { printUnweightedFile(i+1); } if (phylip) { createPhylipFile(i); } rscoreFreq.clear(); rCumul.clear(); validScores.clear(); utreeScores.clear(); UWScoreSig.clear(); } outSum.close(); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to run unifrac.unweighted.\n"); //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "execute"); exit(1); } } /**************************************************************************************************/ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector >& dists, int treeNum) { try { //we need to find the average distance and standard deviation for each groups distance //finds sum vector averages = util.getAverages(dists); //find standard deviation vector stdDev = util.getStandardDeviation(dists, averages); //make matrix with scores in it vector< vector > avedists; //avedists.resize(m->getNumGroups()); for (int i = 0; i < numGroups; i++) { vector temp; for (int j = 0; j < numGroups; j++) { temp.push_back(0.0); } avedists.push_back(temp); } //make matrix with scores in it vector< vector > stddists; //stddists.resize(m->getNumGroups()); for (int i = 0; i < numGroups; i++) { vector temp; for (int j = 0; j < numGroups; j++) { temp.push_back(0.0); } //stddists[i].resize(m->getNumGroups(), 0.0); stddists.push_back(temp); } if (m->getDebug()) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); } //flip it so you can print it int count = 0; for (int r=0; rgetDebug()) { m->mothurOut("[DEBUG]: done filling matrix.\n"); } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "unweighted.ave"; string aveFileName = getOutputFileName("phylip",variables); if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); } else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); } ofstream out; util.openOutputFile(aveFileName, out); variables["[tag2]"] = "unweighted.std"; string stdFileName = getOutputFileName("phylip",variables); if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); } else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); } ofstream outStd; util.openOutputFile(stdFileName, outStd); if ((outputForm == "lt") || (outputForm == "square")) { //output numSeqs out << numGroups << endl; outStd << numGroups << endl; } //output to file for (int r=0; rerrorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices"); exit(1); } } /**************************************************************************************************/ int UnifracUnweightedCommand::getConsensusTrees(vector< vector >& dists, int treeNum) { try { //create treemap class from groupmap for tree class to use CountTable newCt; set nameMap; map groupMap; set gps; for (int i = 0; i < Groups.size(); i++) { nameMap.insert(Groups[i]); gps.insert(Groups[i]); groupMap[Groups[i]] = Groups[i]; } newCt.createTable(nameMap, groupMap, gps); vector newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created if (m->getControl_pressed()) { return 0; } Consensus con; Tree* conTree = con.getTree(newTrees); //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "unweighted.cons"; string conFile = getOutputFileName("tree",variables); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; util.openOutputFile(conFile, outTree); if (conTree != nullptr) { conTree->print(outTree, "boot"); delete conTree; } outTree.close(); return 0; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees"); exit(1); } } /**************************************************************************************************/ vector UnifracUnweightedCommand::buildTrees(vector< vector >& dists, int treeNum, CountTable& myct) { try { vector trees; //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "unweighted.all"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; util.openOutputFile(outputFile, outAll); for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree. if (m->getControl_pressed()) { break; } //make matrix with scores in it vector< vector > sims; sims.resize(Groups.size()); for (int j = 0; j < Groups.size(); j++) { sims[j].resize(Groups.size(), 0.0); } int count = 0; for (int r=0; rassembleTree(); trees.push_back(tempTree); //print tree tempTree->print(outAll); } outAll.close(); if (m->getControl_pressed()) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = nullptr; } util.mothurRemove(outputFile); } return trees; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "buildTrees"); exit(1); } } /**************************************************************************************************/ int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector usersScores) { try { vector randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC... Unweighted unweighted(includeRoot, Groups); vector< vector > namesOfGroupCombos; numGroups = Groups.size(); for (int a=0; a groups; groups.push_back(Groups[a]); groups.push_back(Groups[l]); namesOfGroupCombos.push_back(groups); } } vector > randomTreeNodes; for (int f = 0; f < numComp; f++) { vector randomNodesForThisCombo = thisTree->getNodes(namesOfGroupCombos[f]); randomTreeNodes.push_back(randomNodesForThisCombo); } vector > savedRandomTreeNodes = randomTreeNodes; //get unweighted scores for random trees - if random is false iters = 0 for (int j = 0; j < iters; j++) { randomTreeNodes = savedRandomTreeNodes; for (int f = 0; f < numComp; f++) { util.mothurRandomShuffle(randomTreeNodes[f]); } //randomize labels //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison randomData = unweighted.getValues(thisTree, randomTreeNodes, processors); if (m->getControl_pressed()) { return 0; } for(int k = 0; k < numComp; k++) { //add trees unweighted score to map of scores map::iterator it = rscoreFreq[k].find(randomData[k]); if (it != rscoreFreq[k].end()) {//already have that score rscoreFreq[k][randomData[k]]++; }else{//first time we have seen this score rscoreFreq[k][randomData[k]] = 1; } //add randoms score to validscores validScores[randomData[k]] = randomData[k]; } } for(int a = 0; a < numComp; a++) { float rcumul = 1.0000; //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { //make rscoreFreq map and rCumul map::iterator it2 = rscoreFreq[a].find(it->first); rCumul[a][it->first] = rcumul; //get percentage of random trees with that info if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score } UWScoreSig[a].push_back(rCumul[a][usersScores[a]]); } return 0; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs"); exit(1); } } /***********************************************************/ void UnifracUnweightedCommand::printUnweightedFile(int treeNum) { try { vector data; vector tags; tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul"); map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(treeNum); string unFileName = getOutputFileName("unweighted", variables); FileOutput* output = new ColumnFile(unFileName, itersString); outputNames.push_back(unFileName); outputTypes["unweighted"].push_back(unFileName); for(int a = 0; a < numComp; a++) { output->setLabelName(groupComb[a], tags); //print each line for (map::iterator it = validScores.begin(); it != validScores.end(); it++) { data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); output->updateOutput(data); data.clear(); } output->resetFile(); } delete output; } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile"); exit(1); } } /***********************************************************/ void UnifracUnweightedCommand::printUWSummaryFile(int i) { try { //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); //print each line for(int a = 0; a < numComp; a++) { outSum << i+1 << '\t'; m->mothurOut(toString(i+1) + "\t"); if (random) { if (UWScoreSig[a][0] > (1/(float)iters)) { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n"); }else { outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n"); } }else{ outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl; cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl; m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n"); } } } catch(exception& e) { m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile"); exit(1); } } /***********************************************************/ void UnifracUnweightedCommand::createPhylipFile(int i) { try { string phylipFileName; map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(i+1); if ((outputForm == "lt") || (outputForm == "square")) { variables["[tag2]"] = "unweighted.phylip"; phylipFileName = getOutputFileName("phylip",variables); outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); }else { //column variables["[tag2]"] = "unweighted.column"; phylipFileName = getOutputFileName("column",variables); outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); } ofstream out; util.openOutputFile(phylipFileName, out); numGroups = Groups.size(); if ((outputForm == "lt") || (outputForm == "square")) { //output numSeqs out << numGroups << endl; } //make matrix with scores in it vector< vector > dists; dists.resize(numGroups); for (int i = 0; i < numGroups; i++) { dists[i].resize(numGroups, 0.0); } //flip it so you can print it int count = 0; for (int r=0; rerrorOut(e, "UnifracUnweightedCommand", "createPhylipFile"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/unifracunweightedcommand.h000077500000000000000000000050261424121717000234530ustar00rootroot00000000000000#ifndef UNIFRACUNWEIGHTEDCOMMAND_H #define UNIFRACUNWEIGHTEDCOMMAND_H /* * unifracunweightedcommand.h * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "unweighted.h" #include "counttable.h" #include "fileoutput.h" #include "readtree.h" class UnifracUnweightedCommand : public Command { public: UnifracUnweightedCommand(string); ~UnifracUnweightedCommand() = default; vector setParameters(); string getCommandName() { return "unifrac.unweighted"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Lozupone C, Knight R (2005). UniFrac: a new phylogenetic method for comparing microbial communities. Appl Environ Microbiol 71: 8228-35. \nhttp://www.mothur.org/wiki/Unifrac.unweighted"; } string getDescription() { return "generic tests that describes whether two or more communities have the same structure"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string sumFile, allGroups; vector groupComb; // AB. AC, BC... int iters, numGroups, numComp, counter, processors, subsampleSize, subsampleIters, withReplacement; vector< vector > utreeScores; //scores for users trees for each comb. vector< vector > UWScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or higher. map validScores; //map contains scores from random vector< map > rscoreFreq; //map -vector entry for each combination. vector< map > rCumul; //map -vector entry for each combination. bool abort, phylip, random, includeRoot, consensus, subsample; string groups, itersString, outputForm, treefile, groupfile, namefile, countfile; vector Groups, outputNames; //holds groups to be used ofstream outSum, out; ifstream inFile; int runRandomCalcs(Tree*, vector); void printUWSummaryFile(int); void printUnweightedFile(int); void createPhylipFile(int); vector buildTrees(vector< vector >&, int, CountTable&); int getConsensusTrees(vector< vector >&, int); int getAverageSTDMatrices(vector< vector >&, int); }; #endif mothur-1.48.0/source/commands/unifracweightedcommand.cpp000077500000000000000000001250541424121717000234470ustar00rootroot00000000000000/* * unifracweightedcommand.cpp * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "unifracweightedcommand.h" #include "consensus.h" #include "subsample.h" #include "treereader.h" //********************************************************************************************************************** vector UnifracWeightedCommand::setParameters(){ try { CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); CommandParameter pwithreplacement("withreplacement", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pwithreplacement); CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus); CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom); CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance); CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["weighted"] = tempOutNames; outputTypes["wsummary"] = tempOutNames; outputTypes["phylip"] = tempOutNames; outputTypes["column"] = tempOutNames; outputTypes["tree"] = tempOutNames; abort = false; calledHelp = false; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string UnifracWeightedCommand::getHelpString(){ try { string helpString = ""; helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n"; helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"; helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n"; helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n"; helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n"; helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n"; helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n"; helpString += "The withreplacement parameter allows you to indicate you want to subsample your data allowing for the same read to be included multiple times. Default=f. \n"; helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n"; helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n"; helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n"; helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n"; helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string UnifracWeightedCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "weighted") { pattern = "[filename],weighted-[filename],[tag],weighted"; } else if (type == "wsummary") { pattern = "[filename],[tag],wsummary"; } else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; } else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; } else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern"); exit(1); } } /***********************************************************/ UnifracWeightedCommand::UnifracWeightedCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters=parser.getParameters(); ValidParameters validParameter; treefile = validParameter.validFile(parameters, "tree"); if (treefile == "not open") { treefile = ""; abort = true; } else if (treefile == "not found") { //if there is a current design file, use it treefile = current->getTreeFile(); if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter.\n"); } else { m->mothurOut("You have no current tree file and the tree parameter is required.\n"); abort = true; } }else { current->setTreeFile(treefile); } //check for required parameters groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((namefile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: name or count.\n"); abort = true; } if ((groupfile != "") && (countfile != "")) { m->mothurOut("[ERROR]: you may only use one of the following: group or count.\n"); abort=true; } if (outputdir == ""){ outputdir = util.hasPath(treefile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } itersString = validParameter.valid(parameters, "iters"); if (itersString == "not found") { itersString = "1000"; } util.mothurConvert(itersString, iters); string temp = validParameter.valid(parameters, "distance"); if (temp == "not found") { phylip = false; outputForm = ""; } else{ if (temp=="phylip") { temp = "lt"; } if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; } else { m->mothurOut("Options for distance are: lt, square, or column. Using lt.\n"); phylip = true; outputForm = "lt"; } } temp = validParameter.valid(parameters, "random"); if (temp == "not found") { temp = "F"; } random = util.isTrue(temp); temp = validParameter.valid(parameters, "root"); if (temp == "not found") { temp = "F"; } includeRoot = util.isTrue(temp); temp = validParameter.valid(parameters, "processors"); if (temp == "not found"){ temp = current->getProcessors(); } processors = current->setProcessors(temp); temp = validParameter.valid(parameters, "subsample"); if (temp == "not found") { temp = "F"; } if (util.isNumeric1(temp)) { util.mothurConvert(temp, subsampleSize); subsample = true; } else { if (util.isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later else { subsample = false; } } if (!subsample) { subsampleIters = 0; } else { subsampleIters = iters; } temp = validParameter.valid(parameters, "withreplacement"); if (temp == "not found"){ temp = "f"; } withReplacement = util.isTrue(temp); temp = validParameter.valid(parameters, "consensus"); if (temp == "not found") { temp = "F"; } consensus = util.isTrue(temp); if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; } if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } } else { CountTable testCt; if ((!testCt.testGroups(countfile)) && (subsample)) { m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true; } } if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; } if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; } } } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand"); exit(1); } } /***********************************************************/ int UnifracWeightedCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } long start = time(nullptr); TreeReader* reader; if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); } else { reader = new TreeReader(treefile, countfile); } vector T = reader->getTrees(); CountTable* ct; ct = T[0]->getCountTable(); if ((Groups.size() == 0) || (Groups.size() < 2)) { Groups = ct->getNamesOfGroups(); } //must have at least 2 groups to compare delete reader; if (m->getControl_pressed()) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } map variables; vector nameGroups = ct->getNamesOfGroups(); if (Groups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } if (m->getControl_pressed()) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } if ((Groups.size() == 0) || (Groups.size() < 2)) { Groups = ct->getNamesOfGroups(); } //must have at least 2 groups to compare //set or check size if (subsample) { //user has not set size, set size = smallest samples size if (subsampleSize == -1) { subsampleSize = ct->getNumSeqsSmallestGroup(); m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n"); }else { //eliminate any too small groups vector newGroups = Groups; Groups.clear(); for (int i = 0; i < newGroups.size(); i++) { int thisSize = ct->getGroupCount(newGroups[i]); if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); } else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); } } } } vector groupComb; util.getCombos(groupComb, Groups, numComp); //here in case some groups are removed by subsample if (numComp < processors) { processors = numComp; m->mothurOut("Reducing processors to " + toString(numComp) + ".\n"); } if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; } Weighted weighted(includeRoot, Groups); for (int i = 0; i < T.size(); i++) { if (m->getControl_pressed()) { break; } vector WScoreSig; //tree weighted score signifigance when compared to random trees - percentage of random trees with that score or lower. vector< vector > uScores; uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... vector userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC... vector randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC... userData = weighted.getValues(T[i], processors); //userData[0] = weightedscore if (m->getControl_pressed()) { break; } if (phylip) { createPhylipFile((i+1), userData); } if (random) { runRandomCalcs(T[i], ct, userData, (i+1), WScoreSig, groupComb); } printWSummaryFile((i+1), userData, WScoreSig, groupComb); if (m->getControl_pressed()) { break; } //subsample loop vector< vector > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files SubSample sample; int sampleTime = time(nullptr); for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f. if (m->getControl_pressed()) { break; } //uses method of setting groups to doNotIncludeMe //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. CountTable* newCt = new CountTable(); Tree* subSampleTree; if (withReplacement) { subSampleTree = sample.getSampleWithReplacement(T[i], ct, newCt, subsampleSize, Groups); } else { subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize, Groups); } if (m->getDebug()) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(nullptr) - sampleTime) + " seconds to sample tree.\n"); } //call new weighted function vector iterData; iterData.resize(numComp,0); Weighted thisWeighted(includeRoot, Groups); iterData = thisWeighted.getValues(subSampleTree, processors); //userData[0] = weightedscore //save data to make ave dist, std dist calcDistsTotals.push_back(iterData); delete newCt; delete subSampleTree; //output at 100 iters or after 10 mins if (((thisIter+1) % 100 == 0) || ((time(nullptr) - sampleTime) > 600)){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); sampleTime = time(nullptr); } } if (m->getControl_pressed()) { break; } if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); } if (consensus) { getConsensusTrees(calcDistsTotals, i); } } delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to run unifrac.weighted.\n"); //set phylip file as new current phylipfile string currentName = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setPhylipFile(currentName); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setColumnFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "execute"); exit(1); } } /**************************************************************************************************/ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector >& dists, int treeNum) { try { vector averages = util.getAverages(dists); vector stdDev = util.getStandardDeviation(dists, averages); int numGroups = Groups.size(); vector< vector > avedists; for (int i = 0; i < numGroups; i++) { vector temp; temp.resize(numGroups, 0.0); avedists.push_back(temp); } //make matrix with scores in it vector< vector > stddists; //stddists.resize(m->getNumGroups()); for (int i = 0; i < numGroups; i++) { vector temp; for (int j = 0; j < numGroups; j++) { temp.push_back(0.0); } stddists.push_back(temp); } //flip it so you can print it int count = 0; for (int r=0; r< numGroups; r++) { for (int l = 0; l < r; l++) { avedists[r][l] = averages[count]; avedists[l][r] = averages[count]; stddists[r][l] = stdDev[count]; stddists[l][r] = stdDev[count]; count++; } } map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "weighted.ave"; string aveFileName = getOutputFileName("phylip",variables); if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); } else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); } ofstream out; util.openOutputFile(aveFileName, out); variables["[tag2]"] = "weighted.std"; string stdFileName = getOutputFileName("phylip",variables); if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); } else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); } ofstream outStd; util.openOutputFile(stdFileName, outStd); if ((outputForm == "lt") || (outputForm == "square")) { //output numSeqs out << numGroups << endl; outStd << numGroups << endl; } //output to file for (int r=0; r< numGroups; r++) { string name = Groups[r]; if (name.length() < 10) { while (name.length() < 10) { name += " "; } } //pad with spaces to make compatible if (outputForm == "lt") { out << name; outStd << name; for (int l = 0; l < r; l++) { out << '\t' << avedists[r][l]; outStd << '\t' << stddists[r][l];} //output distances out << endl; outStd << endl; }else if (outputForm == "square") { out << name; outStd << name; for (int l = 0; l < numGroups; l++) { out << '\t' << avedists[r][l]; outStd << '\t' << stddists[r][l]; } //output distances out << endl; outStd << endl; }else{ for (int l = 0; l < r; l++) { string otherName = Groups[l]; if (otherName.length() < 10) { while (otherName.length() < 10) { otherName += " "; } } //pad with spaces to make compatible out << name << '\t' << otherName << '\t' << avedists[r][l] << endl; //output distances outStd << name << '\t' << otherName << '\t' << stddists[r][l] << endl; } } } out.close(); outStd.close(); return 0; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices"); exit(1); } } /**************************************************************************************************/ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, int treeNum) { try { ///create treemap class from groupmap for tree class to use CountTable newCt; set nameMap; map groupMap; set gps; int numGroups = Groups.size(); for (int i = 0; i < numGroups; i++) { nameMap.insert(Groups[i]); gps.insert(Groups[i]); groupMap[Groups[i]] = Groups[i]; } newCt.createTable(nameMap, groupMap, gps); vector newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created if (m->getControl_pressed()) { return 0; } Consensus con; Tree* conTree = con.getTree(newTrees); //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "weighted.cons"; string conFile = getOutputFileName("tree",variables); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; util.openOutputFile(conFile, outTree); if (conTree != nullptr) { conTree->print(outTree, "boot"); delete conTree; } outTree.close(); return 0; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees"); exit(1); } } /**************************************************************************************************/ vector UnifracWeightedCommand::buildTrees(vector< vector >& dists, int treeNum, CountTable& myct) { try { vector trees; //create a new filename map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(treefile)); variables["[tag]"] = toString(treeNum+1); variables["[tag2]"] = "weighted.all"; string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; util.openOutputFile(outputFile, outAll); int numGroups = Groups.size(); for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree. if (m->getControl_pressed()) { break; } //make matrix with scores in it vector< vector > sims; sims.resize(numGroups); for (int j = 0; j < numGroups; j++) { sims[j].resize(numGroups, 0.0); } int count = 0; for (int r=0; rassembleTree(); trees.push_back(tempTree); tempTree->print(outAll); //print tree } outAll.close(); if (m->getControl_pressed()) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = nullptr; } util.mothurRemove(outputFile); } return trees; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "buildTrees"); exit(1); } } /**************************************************************************************************/ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, CountTable* ct, vector usersScores, int iter, vector& WScoreSig, vector groupComb) { try { map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(iter); string wFileName = getOutputFileName("weighted", variables); ColumnFile output(wFileName, itersString); ofstream out; util.openOutputFile(wFileName, out); out.close(); outputNames.push_back(wFileName); outputTypes["weighted"].push_back(wFileName); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; vector< vector > namesOfGroupCombos; int numGroups = Groups.size(); for (int a=0; a groups; groups.push_back(Groups[a]); groups.push_back(Groups[l]); namesOfGroupCombos.push_back(groups); } } vector > randomTreeNodes; for (int f = 0; f < numComp; f++) { randomTreeNodes.push_back(thisTree->getNodes(namesOfGroupCombos[f])); } vector > savedRandomTreeNodes = randomTreeNodes; //get scores for random trees vector > rScores; rScores.resize(numComp); for (int i = 0; i < iters; i++) { if (m->getControl_pressed()) { return 0; } randomTreeNodes = savedRandomTreeNodes; for (int f = 0; f < numComp; f++) { util.mothurRandomShuffle(randomTreeNodes[f]); } vector thisItersRScores = createProcesses(thisTree, ct, namesOfGroupCombos, randomTreeNodes); for (int f = 0; f < numComp; f++) { rScores[f].push_back(thisItersRScores[f]); } if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)+"\n"); } } //find the signifigance of the score for summary file for (int f = 0; f < numComp; f++) { //sort random scores sort(rScores[f].begin(), rScores[f].end()); //the index of the score higher than yours is returned //so if you have 1000 random trees the index returned is 100 //then there are 900 trees with a score greater then you. //giving you a signifigance of 0.900 int index = findIndex(usersScores[f], f, rScores); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand\n"); exit(1); } //error code //the signifigance is the number of trees with the users score or higher WScoreSig.push_back((iters-index)/(float)iters); } set validScores; //map contains scores from random vector< map > rScoreFreq; //map -vector entry for each combination. vector< map > rCumul; //map -vector entry for each c calculateFreqsCumuls(validScores, rScores, rScoreFreq, rCumul); vector tags; tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul"); for(int a = 0; a < numComp; a++) { output.setLabelName(groupComb[a], tags); //print each line for (set::iterator it = validScores.begin(); it != validScores.end(); it++) { vector data; data.push_back(*it); data.push_back(rScoreFreq[a][*it]); data.push_back(rCumul[a][*it]); output.updateOutput(data); } output.resetFile(); } return 0; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs"); exit(1); } } /***********************************************************************/ struct weightedRandomData { bool includeRoot; int count, numComps, start, num; vector Groups, Treenames; vector scores; vector< vector > namesOfGroupCombos; vector > randomizedTreeNodes; MothurOut* m; Tree* t; CountTable* ct; Utils util; weightedRandomData(){} weightedRandomData(int st, int en, vector< vector > ngc, Tree* tree, CountTable* count, bool ir, vector g, vector > randomTreeNodes) { m = MothurOut::getInstance(); num = en; start = st; namesOfGroupCombos = ngc; numComps = namesOfGroupCombos.size(); randomizedTreeNodes = randomTreeNodes; t = tree; ct = count; includeRoot = ir; Groups = g; Treenames = t->getTreeNames(); count = 0; } }; /**************************************************************************************************/ void driverWeightedRandom(weightedRandomData* params) { try { Weighted weighted(params->includeRoot, params->Groups); params->count = 0; Tree* randT = new Tree(params->ct, params->Treenames); for (int h = params->start; h < (params->start+params->num); h++) { if (params->m->getControl_pressed()) { break; } string groupA = params->namesOfGroupCombos[h][0]; string groupB = params->namesOfGroupCombos[h][1]; vector treeNodesFromTheseGroups = params->randomizedTreeNodes[h]; //copy T[i]'s info. randT->getCopy(params->t); //create a random tree with same topology as T[i], but different labels randT->assembleRandomUnifracTree(params->randomizedTreeNodes[h]); if (params->m->getControl_pressed()) { break; } //get wscore of random tree EstOutput randomData = weighted.getValues(randT, groupA, groupB); if (params->m->getControl_pressed()) { break; } //save scores params->scores.push_back(randomData[0]); } delete randT; } catch(exception& e) { params->m->errorOut(e, "UnifracWeightedCommand", "driver"); exit(1); } } /**************************************************************************************************/ vector UnifracWeightedCommand::createProcesses(Tree* t, CountTable* ct, vector< vector > namesOfGroupCombos, vector >& randomizedTreeNodes) { try { //breakdown work between processors vector lines; int remainingPairs = namesOfGroupCombos.size(); if (remainingPairs < processors) { processors = remainingPairs; } int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { CountTable* copyCount = new CountTable(); copyCount->copy(ct); vector Treenames = t->getTreeNames(); Tree* copyTree = new Tree(copyCount, Treenames); copyTree->getCopy(t); weightedRandomData* dataBundle = new weightedRandomData(lines[i+1].start, lines[i+1].end, namesOfGroupCombos, copyTree, copyCount, includeRoot, Groups, randomizedTreeNodes); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverWeightedRandom, dataBundle)); } weightedRandomData* dataBundle = new weightedRandomData(lines[0].start, lines[0].end, namesOfGroupCombos, t, ct, includeRoot, Groups, randomizedTreeNodes); driverWeightedRandom(dataBundle); vector scores = dataBundle->scores; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); scores.insert(scores.end(), data[i]->scores.begin(), data[i]->scores.end()); delete data[i]->t; delete data[i]->ct; delete data[i]; delete workerThreads[i]; } delete dataBundle; return scores; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "createProcesses"); exit(1); } } /***********************************************************/ void UnifracWeightedCommand::printWSummaryFile(int treeIndex, vector utreeScores, vector WScoreSig, vector groupComb) { try { map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(treeIndex); sumFile = getOutputFileName("wsummary",variables); outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile); ofstream outSum; util.openOutputFile(sumFile, outSum); //column headers outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t'; m->mothurOut("Tree#\tGroups\tWScore\t"); if (random) { outSum << "WSig"; m->mothurOut("WSig"); } outSum << endl; m->mothurOutEndLine(); //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); //print each line int precisionLength = itersString.length(); for (int j = 0; j < numComp; j++) { if (random) { if (WScoreSig[j] > (1/(float)iters)) { outSum << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << '\t' << setprecision(precisionLength) << WScoreSig[j] << endl; cout << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << '\t' << setprecision(precisionLength) << WScoreSig[j] << endl; m->mothurOutJustToLog(toString(treeIndex) +"\t" + groupComb[j] +"\t" + toString(utreeScores[j]) +"\t" + toString(WScoreSig[j]) + "\n"); }else{ outSum << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << '\t' << setprecision(precisionLength) << "<" << (1/float(iters)) << endl; cout << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << '\t' << setprecision(precisionLength) << "<" << (1/float(iters)) << endl; m->mothurOutJustToLog(toString(treeIndex) +"\t" + groupComb[j] +"\t" + toString(utreeScores[j]) +"\t<" + toString((1/float(iters))) + "\n"); } }else{ outSum << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << endl; cout << setprecision(6) << treeIndex << '\t' << groupComb[j] << '\t' << utreeScores[j] << endl; m->mothurOutJustToLog(toString(treeIndex) +"\t" + groupComb[j] +"\t" + toString(utreeScores[j]) +"\n"); } } outSum.close(); } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile"); exit(1); } } /***********************************************************/ void UnifracWeightedCommand::createPhylipFile(int treeIndex, vector utreeScores) { try { int count = 0; int numGroups = Groups.size(); string phylipFileName; map variables; variables["[filename]"] = outputdir + util.getSimpleName(treefile); variables["[tag]"] = toString(treeIndex); if ((outputForm == "lt") || (outputForm == "square")) { variables["[tag2]"] = "weighted.phylip"; phylipFileName = getOutputFileName("phylip",variables); outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); }else { //column variables["[tag2]"] = "weighted.column"; phylipFileName = getOutputFileName("column",variables); outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); } ofstream out; util.openOutputFile(phylipFileName, out); if ((outputForm == "lt") || (outputForm == "square")) { out << numGroups << endl; } //make matrix with scores in it vector< vector > dists; dists.resize(numGroups); for (int i = 0; i < numGroups; i++) { dists[i].resize(numGroups, 0.0); } //flip it so you can print it for (int r=0; r< numGroups; r++) { for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count]; dists[l][r] = utreeScores[count]; count++; } } //output to file for (int r=0; rerrorOut(e, "UnifracWeightedCommand", "createPhylipFile"); exit(1); } } /***********************************************************/ int UnifracWeightedCommand::findIndex(float score, int index, vector< vector >& rScores) { try{ int results = rScores[index].size(); for (int i = 0; i < rScores[index].size(); i++) { if (rScores[index][i] >= score) { results = i; break; } } return results; } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "findIndex"); exit(1); } } /***********************************************************/ void UnifracWeightedCommand::calculateFreqsCumuls(set& validScores, vector< vector > rScores, vector< map >& rScoreFreq, vector< map >& rCumul) { try { //clear out old tree values rScoreFreq.clear(); rScoreFreq.resize(numComp); rCumul.clear(); rCumul.resize(numComp); validScores.clear(); //calculate frequency for (int f = 0; f < numComp; f++) { for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3... validScores.insert(rScores[f][i]); map::iterator it = rScoreFreq[f].find(rScores[f][i]); if (it != rScoreFreq[f].end()) { rScoreFreq[f][rScores[f][i]]++; } else { rScoreFreq[f][rScores[f][i]] = 1; } } } //calculate rcumul for(int a = 0; a < numComp; a++) { float rcumul = 1.0000; //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. for (set::iterator it = validScores.begin(); it != validScores.end(); it++) { //make rscoreFreq map and rCumul map::iterator it2 = rScoreFreq[a].find(*it); rCumul[a][*it] = rcumul; //get percentage of random trees with that info if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][*it] /= iters; rcumul-= it2->second; } else { rScoreFreq[a][*it] = 0.0000; } //no random trees with that score } } } catch(exception& e) { m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCumuls"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/commands/unifracweightedcommand.h000077500000000000000000000045661424121717000231200ustar00rootroot00000000000000#ifndef UNIFRACWEIGHTEDCOMMAND_H #define UNIFRACWEIGHTEDCOMMAND_H /* * unifracweightedcommand.h * Mothur * * Created by Sarah Westcott on 2/9/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "weighted.h" #include "counttable.h" #include "fileoutput.h" #include "readtree.h" class UnifracWeightedCommand : public Command { public: UnifracWeightedCommand(string); ~UnifracWeightedCommand() = default; vector setParameters(); string getCommandName() { return "unifrac.weighted"; } string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "Lozupone CA, Hamady M, Kelley ST, Knight R (2007). Quantitative and qualitative beta diversity measures lead to different insights into factors that structure microbial communities. Appl Environ Microbiol 73: 1576-85. \nhttp://www.mothur.org/wiki/Unifrac.weighted"; } string getDescription() { return "generic tests that describes whether two or more communities have the same structure"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: vector Groups, outputNames; // AB. AC, BC... bool abort, phylip, random, includeRoot, subsample, consensus, withReplacement; string groups, itersString, outputForm, treefile, groupfile, namefile, countfile, sumFile; int processors, subsampleSize, subsampleIters, iters, numComp; void printWSummaryFile(int, vector, vector, vector); void createPhylipFile(int, vector); //random comparison functions int findIndex(float, int, vector< vector >&); void calculateFreqsCumuls(set&, vector< vector > rScores, vector< map >&, vector< map >&); vector createProcesses(Tree*, CountTable*, vector< vector >, vector >&); int runRandomCalcs(Tree*, CountTable*, vector, int, vector&, vector); vector buildTrees(vector< vector >&, int, CountTable&); int getConsensusTrees(vector< vector >&, int); int getAverageSTDMatrices(vector< vector >&, int); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/commands/uniqueseqscommand.cpp000077500000000000000000000511361424121717000225000ustar00rootroot00000000000000/* * deconvolute.cpp * Mothur * * Created by Sarah Westcott on 1/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "uniqueseqscommand.h" #include "sequence.hpp" //********************************************************************************************************************** vector UniqueSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pformat("format", "Multiple", "count-name", "count", "", "", "","",false,false, true); parameters.push_back(pformat); CommandParameter poutput("output", "Multiple", "count-name", "count", "", "", "","",false,false, true); parameters.push_back(poutput); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); abort = false; calledHelp = false; vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["count"] = tempOutNames; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string UniqueSeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The unique.seqs command reads a fastafile and creates a name or count file.\n"; helpString += "The unique.seqs command parameters are fasta, name, count and format. fasta is required, unless there is a valid current fasta file.\n"; helpString += "The name parameter is used to provide an existing name file associated with the fasta file. \n"; helpString += "The count parameter is used to provide an existing count file associated with the fasta file. \n"; helpString += "The format parameter is used to indicate what type of file you want outputted. Choices are name and count, default=count unless name file used then default=name.\n"; helpString += "The unique.seqs command should be in the following format: \n"; helpString += "unique.seqs(fasta=yourFastaFile) \n"; return helpString; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string UniqueSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "fasta") { pattern = "[filename],unique,[extension]"; } else if (type == "name") { pattern = "[filename],names-[filename],[tag],names"; } else if (type == "count") { pattern = "[filename],count_table-[filename],[tag],count_table"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "getOutputPattern"); exit(1); } } /**************************************************************************************/ UniqueSeqsCommand::UniqueSeqsCommand(string option) : Command() { try { //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; fastafile = validParameter.validFile(parameters, "fasta"); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = current->getFastaFile(); if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter.\n"); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required.\n"); abort = true; } }else { current->setFastaFile(fastafile); } if (outputdir == ""){ outputdir += util.hasPath(fastafile); } namefile = validParameter.validFile(parameters, "name"); if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found"){ namefile = ""; } else { current->setNameFile(namefile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { abort = true; countfile = ""; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); } if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a unique.seqs command you must enter ONLY ONE of the following: count or name.\n"); abort = true; } //allow format parameter to have two names - format or output format = validParameter.valid(parameters, "format"); if(format == "not found"){ format = validParameter.valid(parameters, "output"); if(format == "not found"){ if (namefile != "") { format = "name"; } else { format = "count"; } } } if ((format != "name") && (format != "count")) { m->mothurOut(format + " is not a valid format option. Options are count or name."); if (countfile == "") { m->mothurOut("I will use count.\n"); format = "count"; } else { m->mothurOut("I will use count.\n"); format = "count"; } } } } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "UniqueSeqsCommand"); exit(1); } } /**************************************************************************************/ int UniqueSeqsCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } if (countfile != "") { processCount(countfile); } else if (namefile != "") { processName(namefile); } else { //no existing duplicate data if (format == "count") { processCount(""); } else { processName(""); } } if (m->getControl_pressed()) { return 0; } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "execute"); exit(1); } } /**************************************************************************************/ string UniqueSeqsCommand::processCount(string countfile) { //countfile can be blank, indicating no countfile provided try { if (format == "name") { if (countfile == "") { return (processName("")); } else { CountTable ct; ct.readTable(countfile, true, false); map nameMap = ct.getNameMap(); string newNamesFile = createNewNameFile(countfile, nameMap); return (processName(newNamesFile)); } } map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outCountFile = getOutputFileName("count", variables); variables["[extension]"] = util.getExtension(fastafile); string outFastaFile = getOutputFileName("fasta", variables); CountTable ct; CountTable newCt; if (countfile != "") { ct.readTable(countfile, true, false); if (countfile == outCountFile){ //prepare filenames and open files map mvariables; mvariables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); mvariables["[tag]"] = "unique"; outCountFile = getOutputFileName("count", mvariables); } newCt.copy(&ct); } if (m->getControl_pressed()) { return 0; } ifstream in; util.openInputFile(fastafile, in); ofstream outFasta; util.openOutputFile(outFastaFile, outFasta); outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); map sequenceStrings; //sequenceString -> list of names. "atgc...." -> seq1,seq2,seq3. map::iterator itStrings; set nameInFastaFile; //for sanity checking set::iterator itname; vector nameFileOrder; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { //not end of file //sanity checks itname = nameInFastaFile.find(seq.getName()); if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName()); } else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct.\n"); } itStrings = sequenceStrings.find(seq.getAligned()); string seqName = seq.getName(); int numCurrentReps = 0; if (itStrings == sequenceStrings.end()) { //this is a new unique sequence seq.printSequence(outFasta); //output to unique fasta file sequenceStrings[seq.getAligned()] = seqName; nameFileOrder.push_back(seq.getAligned()); if (countfile != "") { numCurrentReps = ct.getNumSeqs(seqName); } //checks to make sure seq is in table else { newCt.push_back(seqName); } }else { //this is a dup if (countfile != "") { numCurrentReps = newCt.getNumSeqs(seq.getName()); //checks to make sure seq is in table if (numCurrentReps != 0) { //its in the table newCt.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name } }else { numCurrentReps = newCt.getNumSeqs(itStrings->second); newCt.setNumSeqs(itStrings->second, numCurrentReps+1); } } count++; } if(count % 1000 == 0) { m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n"); } } if(count % 1000 != 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); } in.close(); outFasta.close(); if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); } //print new names file ofstream outCount; util.openOutputFile(outCountFile, outCount); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); newCt.printCompressedHeaders(outCount); for (int i = 0; i < nameFileOrder.size(); i++) { if (m->getControl_pressed()) { break; } itStrings = sequenceStrings.find(nameFileOrder[i]); if (itStrings != sequenceStrings.end()) { newCt.printCompressedSeq(outCount, itStrings->second); }else{ m->mothurOut("[ERROR]: mismatch in namefile print.\n"); m->setControl_pressed(true); } } outCount.close(); if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); util.mothurRemove(outCountFile); outputTypes.clear(); outputNames.clear(); } return outFastaFile; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "processCount"); exit(1); } } /**************************************************************************************/ string UniqueSeqsCommand::processName(string namefile) { //namefile can be blank, indicating no namefile provided try { if (format == "count") { //if user want to convert the count file to a names file, do that first if (namefile == "") { return (processCount("")); } else { //convert name to count map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(namefile)); string newCountFile = getOutputFileName("count", variables); CountTable ct; ct.createTable(namefile, "", nullVector); ct.printCompressedTable(newCountFile); return (processCount(newCountFile)); } } //assumes output is name and namefile is given //prepare filenames map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); string outNameFile = getOutputFileName("name", variables); variables["[extension]"] = util.getExtension(fastafile); string outFastaFile = getOutputFileName("fasta", variables); //check for name overwrite if (namefile == outNameFile){ map mvariables; mvariables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(fastafile)); mvariables["[tag]"] = "unique"; outNameFile = getOutputFileName("name", mvariables); } map nameMap; map::iterator itNames; if (namefile != "") { util.readNames(namefile, nameMap); } //add existing duplicates //bail if error reading namefile if (m->getControl_pressed()) { return ""; } //open files ifstream in; util.openInputFile(fastafile, in); ofstream outFasta; util.openOutputFile(outFastaFile, outFasta); outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); map sequenceStrings; //sequenceString -> list of names. "atgc...." -> seq1,seq2,seq3. map::iterator itStrings; set nameInFastaFile; //for sanity checking set::iterator itname; vector nameFileOrder; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { //not end of file //sanity checks itname = nameInFastaFile.find(seq.getName()); if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName()); } else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct.\n"); } string key = seq.getName(); if (namefile != "") { itNames = nameMap.find(seq.getName()); if (itNames == nameMap.end()) { //namefile and fastafile do not match m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct.\n"); }else { key = itNames->second; } } itStrings = sequenceStrings.find(seq.getAligned()); if (itStrings == sequenceStrings.end()) { //this is a new unique sequence seq.printSequence(outFasta); //output to unique fasta file //add new unique sequence to seqStrings sequenceStrings[seq.getAligned()] = key; nameFileOrder.push_back(seq.getAligned()); }else { //this is a dup sequenceStrings[seq.getAligned()] += "," + key; } count++; } if(count % 1000 == 0) { m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n"); } } if(count % 1000 != 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); } in.close(); outFasta.close(); if (m->getControl_pressed()) { return outFastaFile; } ofstream outNames; util.openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); for (int i = 0; i < nameFileOrder.size(); i++) { if (m->getControl_pressed()) { break; } itStrings = sequenceStrings.find(nameFileOrder[i]); if (itStrings != sequenceStrings.end()) { //get rep name int pos = (itStrings->second).find_first_of(','); if (pos == string::npos) { // only reps itself outNames << itStrings->second << '\t' << itStrings->second << endl; }else { outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl; } }else{ m->mothurOut("[ERROR]: mismatch in namefile print.\n"); m->setControl_pressed(true); } } outNames.close(); if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); util.mothurRemove(outNameFile); outputTypes.clear(); outputNames.clear(); } return outFastaFile; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "processName"); exit(1); } } /**************************************************************************************/ string UniqueSeqsCommand::createNewNameFile(string countfile, map nameMap) { //namefile can be blank, indicating no namefile provided try { map variables; variables["[filename]"] = outputdir + util.getRootName(util.getSimpleName(countfile)); string outNameFile = getOutputFileName("name", variables); ofstream out; util.openOutputFile(outNameFile, out); for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { string seqName = it->first; string expandedName = seqName; int numSeqs = it->second; for (int i = 1; i < numSeqs; i++) { expandedName += "," + seqName + "_" + toString(i); } out << seqName << '\t' << expandedName << endl; } out.close(); return outNameFile; } catch(exception& e) { m->errorOut(e, "UniqueSeqsCommand", "createNewNameFile"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/commands/uniqueseqscommand.h000077500000000000000000000025551424121717000221460ustar00rootroot00000000000000#ifndef DECONVOLUTECOMMAND_H #define DECONVOLUTECOMMAND_H /* * deconvolute.h * Mothur * * Created by Sarah Westcott on 1/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "fastamap.h" #include "counttable.h" /* The unique.seqs command reads a fasta file, finds the duplicate sequences and outputs a names file containing 2 columns. The first being the groupname and the second the list of identical sequence names. */ class UniqueSeqsCommand : public Command { public: UniqueSeqsCommand(string); ~UniqueSeqsCommand() = default; vector setParameters(); string getCommandName() { return "unique.seqs"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Unique.seqs"; } string getDescription() { return "creates a fasta containing the unique sequences as well as a namesfile with the names each sequence represents"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: string fastafile, namefile, countfile, format; vector outputNames; bool abort; string processName(string); //not recommended string processCount(string); string createNewNameFile(string, map); }; #endif mothur-1.48.0/source/commands/venncommand.cpp000077500000000000000000000565431424121717000212530ustar00rootroot00000000000000/* * venncommand.cpp * Mothur * * Created by Sarah Westcott on 3/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "venncommand.h" #include "ace.h" #include "sobs.h" #include "chao1.h" //#include "jackknife.h" #include "sharedsobscollectsummary.h" #include "sharedchao1.h" #include "sharedace.h" #include "nseqs.h" //********************************************************************************************************************** vector VennCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(plist); CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","svg",false,false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pcalc("calc", "String", "", "", "", "", "","",false,false); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund); CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnseqs); CommandParameter psharedotus("sharedotus", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(psharedotus); CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize); CommandParameter ppermute("permute", "Multiple", "1-2-3-4", "4", "", "", "","",false,false); parameters.push_back(ppermute); CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector tempOutNames; outputTypes["svg"] = tempOutNames; abort = false; calledHelp = false; allLines = true; vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; } catch(exception& e) { m->errorOut(e, "VennCommand", "setParameters"); exit(1); } } //********************************************************************************************************************** string VennCommand::getHelpString(){ try { string helpString = ""; helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute, sharedotus, fontsize and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n"; helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n"; helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"; helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n"; helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n"; helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n"; helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n"; helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n"; helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n"; helpString += "If you have more than 4 groups, you can use the permute parameter to set the number of groups you would like mothur to divide the samples into to draw the venn diagrams for all possible combos. Default=4.\n"; helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n"; helpString += "The sharedotus parameter can be used with the sharedsobs calculator to get the names of the OTUs in each section of the venn diagram. Default=t.\n"; helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n"; return helpString; } catch(exception& e) { m->errorOut(e, "VennCommand", "getHelpString"); exit(1); } } //********************************************************************************************************************** string VennCommand::getOutputPattern(string type) { try { string pattern = ""; if (type == "svg") { pattern = "[filename],svg"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true); } return pattern; } catch(exception& e) { m->errorOut(e, "VennCommand", "getOutputPattern"); exit(1); } } //********************************************************************************************************************** VennCommand::VennCommand(string option) : Command() { try { if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else if(option == "category") { abort = true; calledHelp = true; } else { OptionParser parser(option, setParameters()); map parameters = parser.getParameters(); ValidParameters validParameter; listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { format = "list"; inputfile = listfile; current->setListFile(listfile); } sharedfile = validParameter.validFile(parameters, "shared"); if (sharedfile == "not open") { sharedfile = ""; abort = true; } else if (sharedfile == "not found") { sharedfile = ""; } else { format = "sharedfile"; inputfile = sharedfile; current->setSharedFile(sharedfile); } if ((sharedfile == "") && (listfile == "")) { //is there are current file available for any of these? //give priority to shared, then list, then rabund, then sabund //if there is a current shared file, use it sharedfile = current->getSharedFile(); if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter.\n"); } else { listfile = current->getListFile(); if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { m->mothurOut("No valid current files. You must provide a list or shared file.\n"); abort = true; } } } if (outputdir == ""){ outputdir = util.hasPath(inputfile); } //check for optional parameter and set defaults // ...at some point should added some additional type checking... label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = false; } else { allLines = true; } } groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } calc = validParameter.valid(parameters, "calc"); if (calc == "not found") { if(format == "list") { calc = "sobs"; } else { calc = "sharedsobs"; } } else { if (calc == "default") { if(format == "list") { calc = "sobs"; } else { calc = "sharedsobs"; } } } util.splitAtDash(calc, Estimators); if (util.inUsersGroups("citation", Estimators)) { ValidCalculators validCalc; validCalc.printCitations(Estimators); //remove citation from list of calcs for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } } string temp; temp = validParameter.valid(parameters, "abund"); if (temp == "not found") { temp = "10"; } util.mothurConvert(temp, abund); temp = validParameter.valid(parameters, "nseqs"); if (temp == "not found"){ temp = "f"; } nseqs = util.isTrue(temp); temp = validParameter.valid(parameters, "permute"); if (temp == "not found"){ temp = "4"; } else { if ((temp == "1") || (temp == "2") || (temp == "3") || (temp == "4")) {} else { bool permTrue = util.isTrue(temp); if (permTrue) { temp = "4"; } else { } } } util.mothurConvert(temp, perm); if ((perm == 1) || (perm == 2) || (perm == 3) || (perm == 4)) { } else { m->mothurOut("[ERROR]: Not a valid permute value. Valid values are 1, 2, 3, 4 and true.\n"); abort = true; } temp = validParameter.valid(parameters, "sharedotus"); if (temp == "not found"){ temp = "t"; } sharedOtus = util.isTrue(temp); temp = validParameter.valid(parameters, "fontsize"); if (temp == "not found") { temp = "24"; } util.mothurConvert(temp, fontsize); } } catch(exception& e) { m->errorOut(e, "VennCommand", "VennCommand"); exit(1); } } //********************************************************************************************************************** int VennCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } ValidCalculators validCalculator; if (format == "list") { for (int i=0; imothurOut("No valid calculators given, please correct.\n"); return 0; } venn = new Venn(outputdir, nseqs, inputfile, fontsize, sharedOtus); InputData input(inputfile, format, Groups); string lastLabel; SharedRAbundVectors* lookup = nullptr; if (format == "sharedfile") { lookup = input.getSharedRAbundVectors(); lastLabel = lookup->getLabel(); Groups = lookup->getNamesGroups(); if ((lookup->size() > 4)) { combos = findCombinations(lookup->size()); } }else if (format == "list") { sabund = input.getSAbundVector(); lastLabel = sabund->getLabel(); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; if (format != "list") { //as long as you are not at the end of the file or done wih the lines you want while((lookup != nullptr) && ((allLines == 1) || (userLabels.size() != 0))) { vector otuLabels = lookup->getOTUNames(); if (m->getControl_pressed()) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } delete lookup; delete venn; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup->getLabel()) == 1){ m->mothurOut(lookup->getLabel()+"\n"); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); vector data = lookup->getSharedRAbundVectors(); if (lookup->size() > 4) { set< set >::iterator it3; set::iterator it2; for (it3 = combos.begin(); it3 != combos.end(); it3++) { set poss = *it3; vector subset; for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(data[*it2]); } vector outfilenames = venn->getPic(subset, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } }else { vector outfilenames = venn->getPic(data, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); } if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup->getLabel(); delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup->getLabel()+"\n"); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); vector data = lookup->getSharedRAbundVectors(); if (lookup->size() > 4) { set< set >::iterator it3; set::iterator it2; for (it3 = combos.begin(); it3 != combos.end(); it3++) { set poss = *it3; vector subset; for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(data[*it2]); } vector outfilenames = venn->getPic(subset, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } }else { vector outfilenames = venn->getPic(data, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); lookup->setLabels(saveLabel); //restore real lastlabel to save below } lastLabel = lookup->getLabel(); //get next line to process delete lookup; lookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } delete venn; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup->getLabel()+"\n"); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); vector data = lookup->getSharedRAbundVectors(); vector otuLabels = lookup->getOTUNames(); if (lookup->size() > 4) { set< set >::iterator it3; set::iterator it2; for (it3 = combos.begin(); it3 != combos.end(); it3++) { set poss = *it3; vector subset; for (it2 = poss.begin(); it2 != poss.end(); it2++) { subset.push_back(data[*it2]); } vector outfilenames = venn->getPic(subset, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } }else { vector outfilenames = venn->getPic(data, vennCalculators, otuLabels); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } } for (int i = 0; i < data.size(); i++) { delete data[i]; } data.clear(); delete lookup; } if (m->getControl_pressed()) { delete venn; for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } }else{ while((sabund != nullptr) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } delete sabund; delete venn; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(sabund->getLabel()) == 1){ m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); } if ((util.anyLabelsToProcess(sabund->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = sabund->getLabel(); delete sabund; sabund = input.getSAbundVector(lastLabel); m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } processedLabels.insert(sabund->getLabel()); userLabels.erase(sabund->getLabel()); //restore real lastlabel to save below sabund->setLabel(saveLabel); } lastLabel = sabund->getLabel(); delete sabund; sabund = input.getSAbundVector(); } if (m->getControl_pressed()) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete venn; return 0; } //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { if (sabund != nullptr) { delete sabund; } sabund = input.getSAbundVector(lastLabel); m->mothurOut(sabund->getLabel()); m->mothurOutEndLine(); vector outfilenames = venn->getPic(sabund, vennCalculators); for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } } delete sabund; } if (m->getControl_pressed()) { delete venn; for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } } for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } delete venn; m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "VennCommand", "execute"); exit(1); } } //********************************************************************************************************************** //returns a vector of sets containing the group combinations set< set > VennCommand::findCombinations(int lookupSize){ try { set< set > combos; set possibles; for (int i = 0; i < lookupSize; i++) { possibles.insert(i); } getCombos(possibles, combos); return combos; } catch(exception& e) { m->errorOut(e, "VennCommand", "findCombinations"); exit(1); } } //********************************************************************************************************************** //recusively finds combos of length perm int VennCommand::getCombos(set possibles, set< set >& combos){ try { if (possibles.size() == perm) { //done if (combos.count(possibles) == 0) { //no dups combos.insert(possibles); } }else { //we still have work to do set::iterator it; set::iterator it2; for (it = possibles.begin(); it != possibles.end(); it++) { set newPossibles; for (it2 = possibles.begin(); it2 != possibles.end(); it2++) { //all possible combos of one length smaller if (*it != *it2) { newPossibles.insert(*it2); } } getCombos(newPossibles, combos); } } return 0; } catch(exception& e) { m->errorOut(e, "VennCommand", "getCombos"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/commands/venncommand.h000077500000000000000000000025051424121717000207050ustar00rootroot00000000000000#ifndef VENNCOMMAND_H #define VENNCOMMAND_H /* * venncommand.h * Mothur * * Created by Sarah Westcott on 3/30/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "command.hpp" #include "inputdata.h" #include "sharedlistvector.h" #include "venn.h" #include "validcalculator.h" class VennCommand : public Command { public: VennCommand(string); ~VennCommand() = default; vector setParameters(); string getCommandName() { return "venn"; } string getCommandCategory() { return "OTU-Based Approaches"; } string getHelpString(); string getOutputPattern(string); string getCitation() { return "http://www.mothur.org/wiki/Venn"; } string getDescription() { return "generates a Venn diagram from data provided in a shared file"; } int execute(); void help() { m->mothurOut(getHelpString()); } private: SharedListVector* SharedList; Venn* venn; vector vennCalculators; set< set > combos; SAbundVector* sabund; int abund, fontsize, perm; bool abort, allLines, nseqs, sharedOtus; set labels; //holds labels to be used string format, groups, calc, label, sharedfile, listfile, inputfile; vector Estimators, Groups, outputNames; set< set > findCombinations(int); int getCombos(set, set< set >&); }; #endif mothur-1.48.0/source/communitytype/000077500000000000000000000000001424121717000173475ustar00rootroot00000000000000mothur-1.48.0/source/communitytype/communitytype.cpp000077500000000000000000000710071424121717000230110ustar00rootroot00000000000000// // communitytype.cpp // Mothur // // Created by SarahsWork on 12/3/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "communitytype.h" /**************************************************************************************************/ //can we get these psi/psi1 calculations into their own math class? //psi calcualtions swiped from gsl library... static const double psi_cs[23] = { -.038057080835217922, .491415393029387130, -.056815747821244730, .008357821225914313, -.001333232857994342, .000220313287069308, -.000037040238178456, .000006283793654854, -.000001071263908506, .000000183128394654, -.000000031353509361, .000000005372808776, -.000000000921168141, .000000000157981265, -.000000000027098646, .000000000004648722, -.000000000000797527, .000000000000136827, -.000000000000023475, .000000000000004027, -.000000000000000691, .000000000000000118, -.000000000000000020 }; static double apsi_cs[16] = { -.0204749044678185, -.0101801271534859, .0000559718725387, -.0000012917176570, .0000000572858606, -.0000000038213539, .0000000003397434, -.0000000000374838, .0000000000048990, -.0000000000007344, .0000000000001233, -.0000000000000228, .0000000000000045, -.0000000000000009, .0000000000000002, -.0000000000000000 }; /**************************************************************************************************/ /* coefficients for Maclaurin summation in hzeta() * B_{2j}/(2j)! */ static double hzeta_c[15] = { 1.00000000000000000000000000000, 0.083333333333333333333333333333, -0.00138888888888888888888888888889, 0.000033068783068783068783068783069, -8.2671957671957671957671957672e-07, 2.0876756987868098979210090321e-08, -5.2841901386874931848476822022e-10, 1.3382536530684678832826980975e-11, -3.3896802963225828668301953912e-13, 8.5860620562778445641359054504e-15, -2.1748686985580618730415164239e-16, 5.5090028283602295152026526089e-18, -1.3954464685812523340707686264e-19, 3.5347070396294674716932299778e-21, -8.9535174270375468504026113181e-23 }; /**************************************************************************************************/ void CommunityTypeFinder::printSilData(ofstream& out, double chi, vector sils){ try { out << setprecision (6) << numPartitions << '\t' << chi; for (int i = 0; i < sils.size(); i++) { out << '\t' << sils[i]; } out << endl; return; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "printSilData"); exit(1); } } /**************************************************************************************************/ void CommunityTypeFinder::printSilData(ostream& out, double chi, vector sils){ try { out << setprecision (6) << numPartitions << '\t' << chi; m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(chi)); for (int i = 0; i < sils.size(); i++) { out << '\t' << sils[i]; m->mothurOutJustToLog("\t" + toString(sils[i])); } out << endl; m->mothurOutJustToLog("\n"); return; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "printSilData"); exit(1); } } /**************************************************************************************************/ void CommunityTypeFinder::printZMatrix(string fileName, vector sampleName){ try { ofstream printMatrix; util.openOutputFile(fileName, printMatrix); //(fileName.c_str()); printMatrix.setf(ios::fixed, ios::floatfield); printMatrix.setf(ios::showpoint); for(int i=0;ierrorOut(e, "CommunityTypeFinder", "printZMatrix"); exit(1); } } /**************************************************************************************************/ void CommunityTypeFinder::printRelAbund(string fileName, vector otuNames){ try { ofstream printRA; util.openOutputFile(fileName, printRA); //(fileName.c_str()); printRA.setf(ios::fixed, ios::floatfield); printRA.setf(ios::showpoint); vector totals(numPartitions, 0.0000); for(int i=0;igetControl_pressed()) { break; } printRA << otuNames[i]; for(int j=0;j= 0.0000){ double std = sqrt(error[j][i]); printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j]; printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j]; printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j]; } else{ printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j]; printRA << '\t' << "NA"; printRA << '\t' << "NA"; } } printRA << endl; } printRA.close(); } catch(exception& e) { m->errorOut(e, "CommunityTypeFinder", "printRelAbund"); exit(1); } } /**************************************************************************************************/ vector > CommunityTypeFinder::getHessian(){ try { vector alpha(numOTUs, 0.0000); double alphaSum = 0.0000; vector pi = zMatrix[currentPartition]; vector psi_ajk(numOTUs, 0.0000); vector psi_cjk(numOTUs, 0.0000); vector psi1_ajk(numOTUs, 0.0000); vector psi1_cjk(numOTUs, 0.0000); for(int j=0;jgetControl_pressed()) { break; } alpha[j] = exp(lambdaMatrix[currentPartition][j]); alphaSum += alpha[j]; for(int i=0;igetControl_pressed()) { break; } weight += pi[i]; double sum = 0.0000; for(int j=0;j > hessian(numOTUs); for(int i=0;igetControl_pressed()) { break; } double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck); double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck); double term3 = 0.1 * alpha[i]; hessian[i][i] = term1 + term2 + term3; for(int j=0;jerrorOut(e, "CommunityTypeFinder", "getHessian"); exit(1); } } /**************************************************************************************************/ double CommunityTypeFinder::psi1(double xx){ try { /* Euler-Maclaurin summation formula * [Moshier, p. 400, with several typo corrections] */ double s = 2.0000; const int jmax = 12; const int kmax = 10; int j, k; const double pmax = pow(kmax + xx, -s); double scp = s; double pcp = pmax / (kmax + xx); double value = pmax*((kmax+xx)/(s-1.0) + 0.5); for(k=0; kgetControl_pressed()) { return 0; } value += pow(k + xx, -s); } for(j=0; j<=jmax; j++) { if (m->getControl_pressed()) { return 0; } double delta = hzeta_c[j+1] * scp * pcp; value += delta; if(fabs(delta/value) < 0.5*EPSILON) break; scp *= (s+2*j+1)*(s+2*j+2); pcp /= (kmax + xx)*(kmax + xx); } return value; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "psi1"); exit(1); } } /**************************************************************************************************/ double CommunityTypeFinder::psi(double xx){ try { double psiX = 0.0000; if(xx < 1.0000){ double t1 = 1.0 / xx; psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0); psiX = -t1 + psiX; } else if(xx < 2.0000){ const double v = xx - 1.0; psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0); } else{ const double t = 8.0/(xx*xx)-1.0; psiX = cheb_eval(apsi_cs, 15, t); psiX += log(xx) - 0.5/xx; } return psiX; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "psi"); exit(1); } } /**************************************************************************************************/ double CommunityTypeFinder::cheb_eval(const double seriesData[], int order, double xx){ try { double d = 0.0000; double dd = 0.0000; double x2 = xx * 2.0000; for(int j=order;j>=1;j--){ if (m->getControl_pressed()) { return 0; } double temp = d; d = x2 * d - dd + seriesData[j]; dd = temp; } d = xx * d - dd + 0.5 * seriesData[0]; return d; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "cheb_eval"); exit(1); } } /**************************************************************************************************/ int CommunityTypeFinder::findkMeans(){ try { error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); } vector > relativeAbundance(numSamples); vector > alphaMatrix; alphaMatrix.resize(numPartitions); lambdaMatrix.resize(numPartitions); for(int i=0;igetControl_pressed()) { return 0; } int groupTotal = 0; relativeAbundance[i].assign(numOTUs, 0.0); for(int j=0;j temp; for (int i = 0; i < numSamples; i++) { temp.push_back(i); } util.mothurRandomShuffle(temp); //assign each partition at least one random sample int numAssignedSamples = 0; for (int i = 0; i < numPartitions; i++) { zMatrix[i][temp[numAssignedSamples]] = 1; numAssignedSamples++; } //assign rest of samples to partitions int count = 0; for(int i=numAssignedSamples;i 1e-6 && iteration < maxIters){ if (m->getControl_pressed()) { return 0; } //calcualte average relative abundance maxChange = 0.0000; for(int i=0;i averageRelativeAbundance(numOTUs, 0); for(int j=0;j maxChange){ maxChange = normChange; } } //calcualte distance between each sample in partition and the average relative abundance for(int i=0;igetControl_pressed()) { return 0; } double normalizationFactor = 0; vector totalDistToPartition(numPartitions, 0); for(int j=0;jgetControl_pressed()) { return 0; } for(int j=0;j 0){ lambdaMatrix[j][i] = log(alphaMatrix[j][i]); } else{ lambdaMatrix[j][i] = -10.0; } } } return 0; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "kMeans"); exit(1); } } /**************************************************************************************************/ //based on r function .medoid //results is length numOTUs and holds the distances from x of the sample in d with the min sum of distances to all other samples. //Basically the "best" medoid. //returns the sum of the distances squared double CommunityTypeFinder::rMedoid(vector< vector > x, vector< vector > d){ try { vector results; results.resize(numOTUs, 0.0); double minSumDist = MOTHURMAX; int minGroup = -1; for (int i = 0; i < d.size(); i++) { if (m->getControl_pressed()) { break; } double thisSum = 0.0; for (int j = 0; j < d[i].size(); j++) { thisSum += d[i][j]; } if (thisSum < minSumDist) { minSumDist = thisSum; minGroup = i; } } if (minGroup != -1) { for (int i = 0; i < numOTUs; i++) { results[i] = x[minGroup][i]; } //save minGroups relativeAbundance for each OTU }else { m->mothurOut("[ERROR]: unable to find rMedoid group.\n"); m->setControl_pressed(true); } double allMeanDist = 0.0; for (int i = 0; i < x.size(); i++) { //numSamples for (int j = 0; j < x[i].size(); j++) { //numOTus if (m->getControl_pressed()) { break; } allMeanDist += ((x[i][j]-results[j])*(x[i][j]-results[j])); //(otuX sampleY - otuX bestMedoid)^2 } } return allMeanDist; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "rMedoid"); exit(1); } } /**************************************************************************************************/ /*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as: CHk=Bk/(k−1)/Wk/(n−k) where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/ double CommunityTypeFinder::calcCHIndex(vector< vector< double> > dists){ try { double CH = 0.0; if (numPartitions < 2) { return CH; } map clusterMap; //map sample to partition for (int j = 0; j < numSamples; j++) { double maxValue = -MOTHURMAX; for (int i = 0; i < numPartitions; i++) { if (m->getControl_pressed()) { return 0.0; } if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be clusterMap[j] = i; maxValue = zMatrix[i][j]; } } } //make countMatrix a relabund vector > relativeAbundance(numSamples); //[numSamples][numOTUs] //get relative abundance for(int i=0;igetControl_pressed()) { return 0; } int groupTotal = 0; relativeAbundance[i].assign(numOTUs, 0.0); for(int j=0;j > centers = calcCenters(dists, clusterMap, relativeAbundance); if (m->getControl_pressed()) { return 0.0; } double allMeanDist = rMedoid(relativeAbundance, dists); if (m->getDebug()) { m->mothurOut("[DEBUG]: allMeandDist = " + toString(allMeanDist) + "\n"); } for (int i = 0; i < relativeAbundance.size(); i++) {//numSamples for (int j = 0; j < relativeAbundance[i].size(); j++) { //numOtus if (m->getControl_pressed()) { return 0; } //x <- (x - centers[cl, ])^2 relativeAbundance[i][j] = ((relativeAbundance[i][j] - centers[clusterMap[i]][j])*(relativeAbundance[i][j] - centers[clusterMap[i]][j])); } } double wgss = 0.0; for (int j = 0; j < numOTUs; j++) { for(int i=0;igetControl_pressed()) { return 0.0; } wgss += relativeAbundance[i][j]; } } double bgss = allMeanDist - wgss; CH = (bgss / (double)(numPartitions - 1)) / (wgss / (double) (numSamples - numPartitions)); return CH; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "calcCHIndex"); exit(1); } } /**************************************************************************************************/ vector > CommunityTypeFinder::calcCenters(vector >& dists, map clusterMap, vector >& relativeAbundance) { //[numsamples][numsamples] try { //for each partition //choose sample with smallest sum of squared dists vector > centers; centers.resize(numPartitions); vector sums; sums.resize(numSamples, 0.0); map > partition2Samples; //maps partitions to samples in the partition map >::iterator it; for (int i = 0; i < numSamples; i++) { int partitionI = clusterMap[i]; //add this sample to list of samples in this partition for access later it = partition2Samples.find(partitionI); if (it == partition2Samples.end()) { vector temp; temp.push_back(i); partition2Samples[partitionI] = temp; }else { partition2Samples[partitionI].push_back(i); } for (int j = 0; j < numSamples; j++) { int partitionJ = clusterMap[j]; if (partitionI == partitionJ) { //if you are a distance between samples in the same cluster sums[i] += dists[i][j]; sums[j] += dists[i][j]; }else{}//we dont' care about distance between clusters } } vector medoidsVector; medoidsVector.resize(numPartitions, -1); for (it = partition2Samples.begin(); it != partition2Samples.end(); it++) { //for each partition look for sample with smallest squared //sum dist to all other samples in cluster vector members = it->second; double minSumDist = MOTHURMAX; for (int i = 0; i < members.size(); i++) { if (m->getControl_pressed()) { return centers; } if (sums[members[i]] < minSumDist) { minSumDist = sums[members[i]]; medoidsVector[it->first] = members[i]; } } } set medoids; for (int i = 0; i < medoidsVector.size(); i++) { medoids.insert(medoidsVector[i]); } int countPartitions = 0; for (set::iterator it = medoids.begin(); it != medoids.end(); it++) { for (int j = 0; j < numOTUs; j++) { centers[countPartitions].push_back(relativeAbundance[*it][j]); //save the relative abundance of the medoid for this partition for this OTU } countPartitions++; } return centers; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "calcCenters"); exit(1); } } /**************************************************************************************************/ //The silhouette width S(i)of individual data points i is calculated using the following formula: /* s(i) = b(i) - a(i) ----------- max(b(i),a(i)) where a(i) is the average dissimilarity (or distance) of sample i to all other samples in the same cluster, while b(i) is the average dissimilarity (or distance) to all objects in the closest other cluster. The formula implies -1 =< S(i) =< 1 . A sample which is much closer to its own cluster than to any other cluster has a high S(i) value, while S(i) close to 0 implies that the given sample lies somewhere between two clusters. Large negative S(i) values indicate that the sample was assigned to the wrong cluster. */ //based on silouette.r which calls sildist.c written by Francois Romain vector CommunityTypeFinder::calcSilhouettes(vector > dists) { try { vector silhouettes; silhouettes.resize(numSamples, 0.0); if (numPartitions < 2) { return silhouettes; } map clusterMap; //map sample to partition for (int j = 0; j < numSamples; j++) { double maxValue = 0.0; for (int i = 0; i < numPartitions; i++) { if (m->getControl_pressed()) { return silhouettes; } if (zMatrix[i][j] > maxValue) { //for kmeans zmatrix contains values for each sample in each partition. partition with highest value for that sample is the partition where the sample should be clusterMap[j] = i; maxValue = zMatrix[i][j]; } } } //count number of samples in each partition vector counts; counts.resize(numPartitions, 0); vector DiC; DiC.resize((numPartitions*numSamples), 0.0); bool computeSi = true; for (int i = 0; i < numSamples; i++) { int partitionI = clusterMap[i]; counts[partitionI]++; for (int j = i+1; j < numSamples; j++) { if (m->getControl_pressed()) { return silhouettes; } int partitionJ = clusterMap[j]; DiC[numPartitions*i+partitionJ] += dists[i][j]; DiC[numPartitions*j+partitionI] += dists[i][j]; } } vector neighbor; neighbor.resize(numSamples, -1); for (int i = 0; i < numSamples; i++) { if (m->getControl_pressed()) { return silhouettes; } int ki = numPartitions*i; int partitionI = clusterMap[i]; computeSi = true; for (int j = 0; j < numPartitions; j++) { if (j == partitionI) { if (counts[j] == 1) { //only one sample in cluster computeSi = false; }else { DiC[ki+j] /= (counts[j]-1); } }else{ DiC[ki+j] /= counts[j]; } } double ai = DiC[ki+partitionI]; double bi = 0.0; if (partitionI == 0) { bi = DiC[ki+1]; neighbor[i] = 2; } else { bi = DiC[ki]; neighbor[i] = 1; } for (int j = 1; j < numPartitions; j++) { if (j != partitionI) { if (bi > DiC[ki+j]) { bi = DiC[ki + j]; neighbor[i] = j+1; } } } silhouettes[i] = 0.0; if (computeSi && !util.isEqual(bi, ai)) { silhouettes[i] = (bi-ai) / (max(ai, bi)); } } return silhouettes; } catch(exception& e) { m->errorOut(e, "CommunityTypeFinder", "calcSilhouettes"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/communitytype/communitytype.h000077500000000000000000000042621424121717000224550ustar00rootroot00000000000000// // communitytype.h // Mothur // // Created by SarahsWork on 12/3/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_communitytype_h #define Mothur_communitytype_h #define EPSILON numeric_limits::epsilon() #include "mothurout.h" #include "linearalgebra.h" #include "utils.hpp" /**************************************************************************************************/ class CommunityTypeFinder { public: CommunityTypeFinder(){ m = MothurOut::getInstance(); } virtual ~CommunityTypeFinder(){}; virtual void printZMatrix(string, vector); virtual void printRelAbund(string, vector); virtual void printFitData(ofstream&) {} virtual void printFitData(ostream&, double) {} virtual void printSilData(ofstream&, double, vector); virtual void printSilData(ostream&, double, vector); virtual double getNLL() { return currNLL; } virtual double getAIC() { return aic; } virtual double getBIC() { return bic; } virtual double getLogDet() { return logDeterminant; } virtual double getLaplace() { return laplace; } virtual double calcCHIndex(vector< vector< double> >); //Calinski-Harabasz virtual vector calcSilhouettes(vector< vector< double> >); protected: int findkMeans(); vector > getHessian(); double psi1(double); double psi(double); double cheb_eval(const double[], int, double); double rMedoid(vector< vector > x, vector< vector > d); vector > calcCenters(vector >&, map, vector >&); MothurOut* m; vector > zMatrix; vector > lambdaMatrix; vector > error; vector > countMatrix; vector weights; Utils util; int numPartitions; int numSamples; int numOTUs; int currentPartition; double currNLL, aic, bic, logDeterminant, laplace; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/communitytype/kmeans.cpp000077500000000000000000000012751424121717000213410ustar00rootroot00000000000000// // kmeans.cpp // Mothur // // Created by SarahsWork on 12/4/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "kmeans.h" /**************************************************************************************************/ KMeans::KMeans(vector > cm, int p) : CommunityTypeFinder() { try { countMatrix = cm; numSamples = (int)countMatrix.size(); numOTUs = (int)countMatrix[0].size(); numPartitions = p; findkMeans(); } catch(exception& e) { m->errorOut(e, "KMeans", "KMeans"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/communitytype/kmeans.h000077500000000000000000000010251424121717000207770ustar00rootroot00000000000000// // kmeans.h // Mothur // // Created by SarahsWork on 12/4/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_kmeans_h #define Mothur_kmeans_h #include "communitytype.h" /**************************************************************************************************/ class KMeans : public CommunityTypeFinder { public: KMeans(vector >, int); private: }; /**************************************************************************************************/ #endif mothur-1.48.0/source/communitytype/pam.cpp000077500000000000000000000312641424121717000206410ustar00rootroot00000000000000// // pam.cpp // Mothur // // Created by SarahsWork on 12/10/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "pam.h" #ifndef DBL_EPSILON #define DBL_EPSILON 1e-9 #endif /**************************************************************************************************/ Pam::Pam(vector > c, vector > d, int p) : CommunityTypeFinder() { try { countMatrix = c; numSamples = (int)d.size(); numOTUs = (int)c[0].size(); numPartitions = p; dists = d; largestDist = 0; for (int i = 0; i < dists.size(); i++) { for (int j = i; j < dists.size(); j++) { if (m->getControl_pressed()) { break; } if (dists[i][j] > largestDist) { largestDist = dists[i][j]; } } } buildPhase(); //choosing the medoids swapPhase(); //optimize clusters } catch(exception& e) { m->errorOut(e, "Pam", "Pam"); exit(1); } } /**************************************************************************************************/ //build and swap functions based on pam.c by maechler from R cluster package //sets Dp[0] does not set Dp[1]. chooses intial medoids. int Pam::buildPhase() { try { if (m->getDebug()) { m->mothurOut("[DEBUG]: building medoids\n"); } vector gains; gains.resize(numSamples); largestDist *= 1.1 + 1; //make this distance larger than any distance in the matrix Dp.resize(numSamples); for (int i = 0; i < numSamples; i++) { Dp[i].push_back(largestDist); Dp[i].push_back(largestDist); } //2 smallest dists for this sample in this partition zMatrix.resize(numPartitions); for(int i=0;igetControl_pressed()) { break; } if (medoids.count(i) == 0) { //is this sample is NOT a medoid? gains[i] = 0.0; for (int j = 0; j < numSamples; j++) { totalGain = Dp[j][0] - dists[i][j]; if (totalGain > 0.0) { gains[i] += totalGain; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(i) + " totalGain = " + toString(totalGain) + "\n"); } if (clusterGain <= gains[i]) { clusterGain = gains[i]; medoid = i; } } } //save medoid value medoids.insert(medoid); if (m->getDebug()) { m->mothurOut("[DEBUG]: new medoid " + toString(medoid) + "\n"); } //update dp values for (int i = 0; i < numSamples; i++) { if (Dp[i][0] > dists[i][medoid]) { Dp[i][0] = dists[i][medoid]; } } } if (m->getDebug()) { m->mothurOut("[DEBUG]: done building medoids\n"); } return 0; } catch(exception& e) { m->errorOut(e, "Pam", "buildPhase"); exit(1); } } /**************************************************************************************************/ //goal to swap medoids with non-medoids to see if we can reduce the overall cost int Pam::swapPhase() { try { if (m->getDebug()) { m->mothurOut("[DEBUG]: swapping medoids\n"); } //calculate cost of initial choice - average distance of samples to their closest medoid double sky = 0.0; double dzsky = 1.0; for (int i = 0; i < numSamples; i++) { sky += Dp[i][0]; } //sky /= (double) numSamples; bool done = false; int hbest, nbest; hbest = -1; nbest = -1; while (!done) { if (m->getControl_pressed()) { break; } updateDp(); dzsky = 1; for (int h = 0; h < numSamples; h++) { if (m->getControl_pressed()) { break; } if (medoids.count(h) == 0) { //this is NOT a medoid for (int i = 0; i < numSamples; i++) { if (medoids.count(i) != 0) { //this is a medoid double dz = 0.0; //Tih sum of distances between objects and closest medoid caused by swapping i and h. Basically the change in cost. If this < 0 its a "good" swap. When all Tih are > 0, then we stop the algo, because we have the optimal medoids. for (int j = 0; j < numSamples; j++) { if (m->getControl_pressed()) { break; } if (util.isEqual(dists[i][j], Dp[j][0])) { double smallValue; smallValue = 0.0; if (Dp[j][1] > dists[h][j]) { smallValue = dists[h][j]; } else { smallValue = Dp[j][1]; } dz += (- Dp[j][0]+ smallValue); }else if (dists[h][j] < Dp[j][0]) { dz += (- Dp[j][0] + dists[h][j]); } } if (dzsky > dz) { dzsky = dz; hbest = h; nbest = i; } }//end if medoid }//end for i }//end if NOT medoid }//end if h if (dzsky < -16 *DBL_EPSILON * fabs(sky)) { medoids.insert(hbest); medoids.erase(nbest); if (m->getDebug()) { m->mothurOut("[DEBUG]: swapping " + toString(hbest) + " " + toString(nbest) + "\n"); } sky += dzsky; }else { done = true; } //stop algo. } //fill zmatrix int count = 0; vector tempMedoids; for (set::iterator it = medoids.begin(); it != medoids.end(); it++) { medoid2Partition[*it] = count; zMatrix[count][*it] = 1; count++; //set medoid in this partition. tempMedoids.push_back(*it); } //which partition do you belong to? laplace = 0; for (int i = 0; i < numSamples; i++) { int partition = 0; double dist = dists[i][tempMedoids[0]]; //assign to first medoid for (int j = 1; j < tempMedoids.size(); j++) { if (dists[i][tempMedoids[j]] < dist) { //is this medoid closer? dist = dists[i][tempMedoids[j]]; partition = j; } } zMatrix[partition][i] = 1; laplace += dist; } laplace /= (double) numSamples; if (m->getDebug()) { for(int i=0;imothurOut("[DEBUG]: partition 1: "); for (int j = 0; j < numSamples; j++) { m->mothurOut(toString(zMatrix[i][j]) + " "); } m->mothurOut("\n"); } m->mothurOut("[DEBUG]: medoids : "); for (set::iterator it = medoids.begin(); it != medoids.end(); it++) { m->mothurOut(toString(*it) + " "); } m->mothurOut("\n"); m->mothurOut("[DEBUG]: laplace : " + toString(laplace)); m->mothurOut("\n"); } if (m->getDebug()) { m->mothurOut("[DEBUG]: done swapping medoids\n"); } return 0; } catch(exception& e) { m->errorOut(e, "Pam", "swapPhase"); exit(1); } } /**************************************************************************************************/ int Pam::updateDp() { try { for (int j = 0; j < numSamples; j++) { if (m->getControl_pressed()) { break; } //initialize dp and ep Dp[j][0] = largestDist; Dp[j][1] = largestDist; for (int i = 0; i < numSamples; i++) { if (medoids.count(i) != 0) { //is this a medoid? if (Dp[j][0] > dists[j][i]) { Dp[j][0] = Dp[j][1]; Dp[j][0] = dists[j][i]; }else if (Dp[j][1] > dists[j][i]) { Dp[j][1] = dists[j][i]; } } } } return 0; } catch(exception& e) { m->errorOut(e, "Pam", "updateDp"); exit(1); } } /**************************************************************************************************/ /*To assess the optimal number of clusters our dataset was most robustly partitioned into, we used the Calinski-Harabasz (CH) Index that has shown good performance in recovering the number of clusters. It is defined as: CHk=Bk/(k−1)/Wk/(n−k) where Bk is the between-cluster sum of squares (i.e. the squared distances between all points i and j, for which i and j are not in the same cluster) and Wk is the within-clusters sum of squares (i.e. the squared distances between all points i and j, for which i and j are in the same cluster). This measure implements the idea that the clustering is more robust when between-cluster distances are substantially larger than within-cluster distances. Consequently, we chose the number of clusters k such that CHk was maximal.*/ //based on R index.G1.r function double Pam::calcCHIndex(vector< vector > dists){ //countMatrix = [numSamples][numOtus] try { double CH = 0.0; if (numPartitions < 2) { return CH; } map clusterMap; //map sample to partition for (int i = 0; i < numPartitions; i++) { for (int j = 0; j < numSamples; j++) { if (m->getControl_pressed()) { return 0.0; } if (!util.isEqual(zMatrix[i][j], 0)) { clusterMap[j] = i; } } } //make countMatrix a relabund vector > relativeAbundance(numSamples); //[numSamples][numOTUs] //get relative abundance for(int i=0;igetControl_pressed()) { return 0; } int groupTotal = 0; relativeAbundance[i].assign(numOTUs, 0.0); for(int j=0;j > centers; centers.resize(numPartitions); int countPartitions = 0; for (set::iterator it = medoids.begin(); it != medoids.end(); it++) { for (int j = 0; j < numOTUs; j++) { centers[countPartitions].push_back(relativeAbundance[*it][j]); //save the relative abundance of the medoid for this partition for this OTU } countPartitions++; } //centers.clear(); //centers = calcCenters(dists, clusterMap, relativeAbundance); double allMeanDist = rMedoid(relativeAbundance, dists); if (m->getDebug()) { m->mothurOut("[DEBUG]: allMeandDist = " + toString(allMeanDist) + "\n"); } for (int i = 0; i < relativeAbundance.size(); i++) {//numSamples for (int j = 0; j < relativeAbundance[i].size(); j++) { //numOtus if (m->getControl_pressed()) { return 0; } //x <- (x - centers[cl, ])^2 relativeAbundance[i][j] = ((relativeAbundance[i][j] - centers[clusterMap[i]][j])*(relativeAbundance[i][j] - centers[clusterMap[i]][j])); } } double wgss = 0.0; for (int j = 0; j < numOTUs; j++) { for(int i=0;igetControl_pressed()) { return 0.0; } wgss += relativeAbundance[i][j]; } } double bgss = allMeanDist - wgss; CH = (bgss / (double)(numPartitions - 1)) / (wgss / (double) (numSamples - numPartitions)); return CH; } catch(exception& e){ m->errorOut(e, "Pam", "calcCHIndex"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/communitytype/pam.h000077500000000000000000000021451424121717000203020ustar00rootroot00000000000000// // pam.h // Mothur // // Created by SarahsWork on 12/10/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef Mothur_pam_h #define Mothur_pam_h #include "communitytype.h" //Partitioning Around Medoids /**************************************************************************************************/ class Pam : public CommunityTypeFinder { public: Pam(vector >, vector >, int); double calcCHIndex(vector< vector< double> >); private: set medoids; map medoid2Partition; double largestDist; vector > dists; vector > Dp; // [numSamples][2] - It contains Dp and Ep. Dp is in [numSamples][0] and Ep is in [numSamples][1]. Dp is the distance between p and the closest sample in S and Ep is the distance between p and the second closest object in S. Both are used in the build and swap phases. int buildPhase(); int swapPhase(); int updateDp(); /**************************************************************************************************/ }; #endif mothur-1.48.0/source/communitytype/qFinderDMM.cpp000077500000000000000000000630451424121717000220140ustar00rootroot00000000000000// // qFinderDMM.cpp // pds_dmm // // Created by Patrick Schloss on 11/8/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #include "qFinderDMM.h" /**************************************************************************************************/ qFinderDMM::qFinderDMM(vector > cm, int p) : CommunityTypeFinder() { try { numPartitions = p; countMatrix = cm; numSamples = (int)countMatrix.size(); numOTUs = (int)countMatrix[0].size(); findkMeans(); optimizeLambda(); double change = 1.0000; currNLL = 0.0000; int iter = 0; while(change > 1.0e-6 && iter < 100){ calculatePiK(); optimizeLambda(); for(int i=0;igetDebug()) { m->mothurOut("current partition = " + toString(currentPartition) + "\n"); } if(currentPartition > 0){ logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition])); } vector > hessian = getHessian(); vector > invHessian = l.getInverse(hessian); for(int i=0;ierrorOut(e, "qFinderDMM", "qFinderDMM"); exit(1); } } /**************************************************************************************************/ void qFinderDMM::printFitData(ofstream& out){ try { out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << endl; return; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "printFitData"); exit(1); } } /**************************************************************************************************/ void qFinderDMM::printFitData(ostream& out, double minLaplace){ try { if(laplace < minLaplace){ out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << "***" << endl; }else { out << setprecision (2) << numPartitions << '\t' << getNLL() << '\t' << getLogDet() << '\t' << getBIC() << '\t' << getAIC() << '\t' << laplace << endl; } m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(getNLL()) + '\t' + toString(getLogDet()) + '\t'); m->mothurOutJustToLog(toString(getBIC()) + '\t' + toString(getAIC()) + '\t' + toString(laplace)); return; } catch(exception& e){ m->errorOut(e, "CommunityTypeFinder", "printFitData"); exit(1); } } /**************************************************************************************************/ // these functions for bfgs2 solver were lifted from the gnu_gsl source code... /* Find a minimum in x=[0,1] of the interpolating quadratic through * (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating * polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2 */ static double interp_quad (double f0, double fp0, double f1, double zl, double zh) { double fl = f0 + zl*(fp0 + zl*(f1 - f0 -fp0)); double fh = f0 + zh*(fp0 + zh*(f1 - f0 -fp0)); double c = 2 * (f1 - f0 - fp0); /* curvature */ double zmin = zl, fmin = fl; if (fh < fmin) { zmin = zh; fmin = fh; } if (c > 0) /* positive curvature required for a minimum */ { double z = -fp0 / c; /* location of minimum */ if (z > zl && z < zh) { double f = f0 + z*(fp0 + z*(f1 - f0 -fp0)); if (f < fmin) { zmin = z; fmin = f; }; } } return zmin; } /**************************************************************************************************/ /* Find a minimum in x=[0,1] of the interpolating cubic through * (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1. * * The interpolating polynomial is: * * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3 * * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). */ double cubic (double c0, double c1, double c2, double c3, double z){ return c0 + z * (c1 + z * (c2 + z * c3)); } /**************************************************************************************************/ void check_extremum (double c0, double c1, double c2, double c3, double z, double *zmin, double *fmin){ /* could make an early return by testing curvature >0 for minimum */ double y = cubic (c0, c1, c2, c3, z); if (y < *fmin) { *zmin = z; /* accepted new point*/ *fmin = y; } } /**************************************************************************************************/ int gsl_poly_solve_quadratic (double a, double b, double c, double *x0, double *x1) { Utils util; double disc = b * b - 4 * a * c; if (util.isEqual(a, 0))/* Handle linear case */ { if (util.isEqual(b, 0)) { return 0; } else { *x0 = -c / b; return 1; }; } if (disc > 0) { if (util.isEqual(b, 0)) { double r = fabs (0.5 * sqrt (disc) / a); *x0 = -r; *x1 = r; } else { double sgnb = (b > 0 ? 1 : -1); double temp = -0.5 * (b + sgnb * sqrt (disc)); double r1 = temp / a ; double r2 = c / temp ; if (r1 < r2) { *x0 = r1 ; *x1 = r2 ; } else { *x0 = r2 ; *x1 = r1 ; } } return 2; } else if (util.isEqual(disc, 0)) { *x0 = -0.5 * b / a ; *x1 = -0.5 * b / a ; return 2 ; } else { return 0; } } /**************************************************************************************************/ double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, double zh){ double eta = 3 * (f1 - f0) - 2 * fp0 - fp1; double xi = fp0 + fp1 - 2 * (f1 - f0); double c0 = f0, c1 = fp0, c2 = eta, c3 = xi; double zmin, fmin; double z0, z1; zmin = zl; fmin = cubic(c0, c1, c2, c3, zl); check_extremum (c0, c1, c2, c3, zh, &zmin, &fmin); { int n = gsl_poly_solve_quadratic (3 * c3, 2 * c2, c1, &z0, &z1); if (n == 2) /* found 2 roots */ { if (z0 > zl && z0 < zh) check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin); if (z1 > zl && z1 < zh) check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin); } else if (n == 1) /* found 1 root */ { if (z0 > zl && z0 < zh) check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin); } } return zmin; } /**************************************************************************************************/ double interpolate (double a, double fa, double fpa, double b, double fb, double fpb, double xmin, double xmax){ /* Map [a,b] to [0,1] */ double z, alpha, zmin, zmax; zmin = (xmin - a) / (b - a); zmax = (xmax - a) / (b - a); if (zmin > zmax) { double tmp = zmin; zmin = zmax; zmax = tmp; }; if(!isnan(fpb) ){ z = interp_cubic (fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax); } else{ z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax); } alpha = a + z * (b - a); return alpha; } /**************************************************************************************************/ int qFinderDMM::lineMinimizeFletcher(vector& x, vector& p, double f0, double df0, double alpha1, double& alphaNew, double& fAlpha, vector& xalpha, vector& gradient ){ try { double rho = 0.01; double sigma = 0.10; double tau1 = 9.00; double tau2 = 0.05; double tau3 = 0.50; double alpha = alpha1; double alpha_prev = 0.0000; xalpha.resize(numOTUs, 0.0000); double falpha_prev = f0; double dfalpha_prev = df0; double a = 0.0000; double b = alpha; double fa = f0; double fb = 0.0000; double dfa = df0; double dfb = 0.0/0.0; int iter = 0; int maxIters = 100; while(iter++ < maxIters){ if (m->getControl_pressed()) { break; } for(int i=0;i f0 + alpha * rho * df0 || fAlpha >= falpha_prev){ a = alpha_prev; b = alpha; fa = falpha_prev; fb = fAlpha; dfa = dfalpha_prev; dfb = 0.0/0.0; break; } negativeLogDerivEvidenceLambdaPi(xalpha, gradient); double dfalpha = 0.0000; for(int i=0;i= 0){ a = alpha; b = alpha_prev; fa = fAlpha; fb = falpha_prev; dfa = dfalpha; dfb = dfalpha_prev; break; } double delta = alpha - alpha_prev; double lower = alpha + delta; double upper = alpha + tau1 * delta; double alphaNext = interpolate(alpha_prev, falpha_prev, dfalpha_prev, alpha, fAlpha, dfalpha, lower, upper); alpha_prev = alpha; falpha_prev = fAlpha; dfalpha_prev = dfalpha; alpha = alphaNext; } iter = 0; while(iter++ < maxIters){ if (m->getControl_pressed()) { break; } double delta = b - a; double lower = a + tau2 * delta; double upper = b - tau3 * delta; alpha = interpolate(a, fa, dfa, b, fb, dfb, lower, upper); for(int i=0;i f0 + rho * alpha * df0 || fAlpha >= fa){ b = alpha; fb = fAlpha; dfb = 0.0/0.0; } else{ double dfalpha = 0.0000; negativeLogDerivEvidenceLambdaPi(xalpha, gradient); dfalpha = 0.0000; for(int i=0;i= 0 && dfalpha >= 0) || ((b-a) <= 0.000 && dfalpha <= 0))){ b = a; fb = fa; dfb = dfa; a = alpha; fa = fAlpha; dfa = dfalpha; } else{ a = alpha; fa = fAlpha; dfa = dfalpha; } } } return 1; } catch(exception& e) { m->errorOut(e, "qFinderDMM", "lineMinimizeFletcher"); exit(1); } } /**************************************************************************************************/ int qFinderDMM::bfgs2_Solver(vector& x){ try{ int bfgsIter = 0; double step = 1.0e-6; double delta_f = 0.0000;//f-f0; vector gradient; double f = negativeLogEvidenceLambdaPi(x); negativeLogDerivEvidenceLambdaPi(x, gradient); vector x0 = x; vector g0 = gradient; double g0norm = 0; for(int i=0;i p = gradient; double pNorm = 0; for(int i=0;i 0.001 && bfgsIter++ < maxIter){ if (m->getControl_pressed()) { return 0; } double f0 = f; vector dx(numOTUs, 0.0000); double alphaOld, alphaNew; if(util.isEqual(pNorm, 0) || util.isEqual(g0norm, 0) || util.isEqual(df0, 0)){ dx.assign(numOTUs, 0.0000); break; } if(delta_f < 0){ double delta = max(-delta_f, 10 * EPSILON * abs(f0)); alphaOld = min(1.0, 2.0 * delta / (-df0)); } else{ alphaOld = step; } int success = lineMinimizeFletcher(x0, p, f0, df0, alphaOld, alphaNew, f, x, gradient); if(!success){ x = x0; break; } delta_f = f - f0; vector dx0(numOTUs); vector dg0(numOTUs); for(int i=0;i= 0.0) ? -1.0 : +1.0; for(int i=0;ierrorOut(e, "qFinderDMM", "bfgs2_Solver"); exit(1); } } /**************************************************************************************************/ double qFinderDMM::negativeLogEvidenceLambdaPi(vector& x){ try{ vector sumAlphaX(numSamples, 0.0000); double logEAlpha = 0.0000; double sumLambda = 0.0000; double sumAlpha = 0.0000; double logE = 0.0000; double nu = 0.10000; double eta = 0.10000; double weight = 0.00000; for(int i=0;igetControl_pressed()) { return 0; } double lambda = x[i]; double alpha = exp(x[i]); logEAlpha += lgamma(alpha); sumLambda += lambda; sumAlpha += alpha; for(int j=0;jerrorOut(e, "qFinderDMM", "negativeLogEvidenceLambdaPi"); exit(1); } } /**************************************************************************************************/ void qFinderDMM::negativeLogDerivEvidenceLambdaPi(vector& x, vector& df){ try{ vector storeVector(numSamples, 0.0000); vector derivative(numOTUs, 0.0000); vector alpha(numOTUs, 0.0000); double store = 0.0000; double nu = 0.1000; double eta = 0.1000; double weight = 0.0000; for(int i=0;igetControl_pressed()) { return; } alpha[i] = exp(x[i]); store += alpha[i]; derivative[i] = weight * psi(alpha[i]); for(int j=0;jerrorOut(e, "qFinderDMM", "negativeLogDerivEvidenceLambdaPi"); exit(1); } } /**************************************************************************************************/ double qFinderDMM::getNegativeLogEvidence(vector& lambda, int group){ try { double sumAlpha = 0.0000; double sumAlphaX = 0.0000; double sumLnGamAlpha = 0.0000; double logEvidence = 0.0000; for(int i=0;igetControl_pressed()) { return 0; } double alpha = exp(lambda[i]); double X = countMatrix[group][i]; double alphaX = alpha + X; sumLnGamAlpha += lgamma(alpha); sumAlpha += alpha; sumAlphaX += alphaX; logEvidence -= lgamma(alphaX); } sumLnGamAlpha -= lgamma(sumAlpha); logEvidence += lgamma(sumAlphaX); return logEvidence + sumLnGamAlpha; } catch(exception& e){ m->errorOut(e, "qFinderDMM", "getNegativeLogEvidence"); exit(1); } } /**************************************************************************************************/ void qFinderDMM::optimizeLambda(){ try { for(currentPartition=0;currentPartitiongetControl_pressed()) { return; } bfgs2_Solver(lambdaMatrix[currentPartition]); } } catch(exception& e){ m->errorOut(e, "qFinderDMM", "optimizeLambda"); exit(1); } } /**************************************************************************************************/ void qFinderDMM::calculatePiK(){ try { vector store(numPartitions); for(int i=0;igetControl_pressed()) { return; } double sum = 0.0000; double minNegLogEvidence =numeric_limits::max(); for(int j=0;jgetControl_pressed()) { return; } zMatrix[j][i] = weights[j] * exp(-(store[j] - minNegLogEvidence)); sum += zMatrix[j][i]; } for(int j=0;jerrorOut(e, "qFinderDMM", "calculatePiK"); exit(1); } } /**************************************************************************************************/ double qFinderDMM::getNegativeLogLikelihood(){ try { double eta = 0.10000; double nu = 0.10000; vector pi(numPartitions, 0.0000); vector logBAlpha(numPartitions, 0.0000); double doubleSum = 0.0000; for(int i=0;igetControl_pressed()) { return 0; } double sumAlphaK = 0.0000; pi[i] = weights[i] / (double)numSamples; for(int j=0;jgetControl_pressed()) { return 0; } double probability = 0.0000; double factor = 0.0000; double sum = 0.0000; vector logStore(numPartitions, 0.0000); double offset = -numeric_limits::max(); for(int j=0;j offset){ offset = logStore[k]; } } for(int k=0;kgetControl_pressed()) { return 0; } alphaSum += exp(lambdaMatrix[i][j]); lambdaSum += lambdaMatrix[i][j]; } } alphaSum *= -nu; lambdaSum *= eta; return (-doubleSum - L5 - L6 - alphaSum - lambdaSum); } catch(exception& e){ m->errorOut(e, "qFinderDMM", "getNegativeLogLikelihood"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/communitytype/qFinderDMM.h000077500000000000000000000022061424121717000214510ustar00rootroot00000000000000// // qFinderDMM.h // pds_dmm // // Created by Patrick Schloss on 11/8/12. // Copyright (c) 2012 University of Michigan. All rights reserved. // #ifndef pds_dmm_qFinderDMM_h #define pds_dmm_qFinderDMM_h #include "communitytype.h" /**************************************************************************************************/ class qFinderDMM : public CommunityTypeFinder { public: qFinderDMM(vector >, int); void printFitData(ofstream&); void printFitData(ostream&, double); private: void optimizeLambda(); void calculatePiK(); double negativeLogEvidenceLambdaPi(vector&); void negativeLogDerivEvidenceLambdaPi(vector&, vector&); double getNegativeLogEvidence(vector&, int); double getNegativeLogLikelihood(); int lineMinimizeFletcher(vector&, vector&, double, double, double, double&, double&, vector&, vector&); int bfgs2_Solver(vector&);//, double, double); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/completelinkage.cpp000077500000000000000000000017221424121717000202750ustar00rootroot00000000000000 #include "cluster.hpp" /***********************************************************************/ CompleteLinkage::CompleteLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) : Cluster(rav, lv, dm, c, s, a) {} /***********************************************************************/ //This function returns the tag of the method. string CompleteLinkage::getTag() { return("fn"); } /***********************************************************************/ //This function updates the distance based on the furthest neighbor method. bool CompleteLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) { try { bool changed = false; if (colCell.dist < rowCell.dist) { colCell.dist = rowCell.dist; changed = true; } return(changed); } catch(exception& e) { m->errorOut(e, "CompleteLinkage", "updateDistance"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/consensus.cpp000077500000000000000000000300011424121717000171420ustar00rootroot00000000000000/* * consensuscommand.cpp * Mothur * * Created by Sarah Westcott on 4/29/09. * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. * */ #include "consensus.h" //********************************************************************************************************************** Tree* Consensus::getTree(vector& t){ try { numNodes = t[0]->getNumNodes(); numLeaves = t[0]->getNumLeaves(); numTrees = t.size(); //get the possible pairings getSets(t); if (m->getControl_pressed()) { return 0; } vector Treenames = t[0]->getTreeNames(); consensusTree = new Tree(t[0]->getCountTable(), Treenames); it2 = nodePairs.find(treeSet); nodePairsInTree[treeSet] = it2->second; //erase treeset because you are adding it nodePairs.erase(treeSet); //set count to numLeaves; count = numLeaves; buildConsensusTree(treeSet); if (m->getControl_pressed()) { delete consensusTree; return 0; } consensusTree->assembleTree(); if (m->getControl_pressed()) { delete consensusTree; return 0; } return consensusTree; return 0; } catch(exception& e) { m->errorOut(e, "Consensus", "execute"); exit(1); } } //********************************************************************************************************************** int Consensus::printSetsInfo() { try { //open file for pairing not included in the tree string notIncluded = "cons.pairs"; ofstream out2; Utils util; util.openOutputFile(notIncluded, out2); //output species in order out2 << "Species in Order: " << endl << endl; for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; } //output sets included out2 << endl << "Sets included in the consensus tree:" << endl << endl; if (m->getControl_pressed()) { return 0; } vector temp; for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) { if (m->getControl_pressed()) { return 0; } //only output pairs not leaves if (it2->first.size() > 1) { temp.clear(); //initialize temp to all "." temp.resize(treeSet.size(), "."); //set the spot in temp that represents it2->first[i] to a "*" for (int i = 0; i < it2->first.size(); i++) { //find spot int index = findSpot(it2->first[i]); temp[index] = "*"; //temp[index] = it2->first[i] + " "; } //output temp for (int j = 0; j < temp.size(); j++) { out2 << temp[j]; } out2 << '\t' << it2->second << endl; } } //output sets not included out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl; for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { if (m->getControl_pressed()) { return 0; } temp.clear(); //initialize temp to all "." temp.resize(treeSet.size(), "."); //set the spot in temp that represents it2->first[i] to a "*" for (int i = 0; i < it2->first.size(); i++) { //find spot int index = findSpot(it2->first[i]); temp[index] = "*"; } //output temp for (int j = 0; j < temp.size(); j++) { out2 << temp[j]; } out2 << '\t' << it2->second << endl; } return 0; } catch(exception& e) { m->errorOut(e, "Consensus", "printSetsInfo"); exit(1); } } //********************************************************************************************************************** int Consensus::buildConsensusTree(vector nodeSet) { try { vector leftChildSet; vector rightChildSet; if (m->getControl_pressed()) { return 1; } //if you are at a leaf if (nodeSet.size() == 1) { //return the vector index of the leaf you are at return consensusTree->getIndex(nodeSet[0]); //terminate recursion }else if (count == numNodes) { return 0; } else { //finds best child pair leftChildSet = getNextAvailableSet(nodeSet, rightChildSet); int left = buildConsensusTree(leftChildSet); int right = buildConsensusTree(rightChildSet); consensusTree->tree[count].setChildren(left, right); consensusTree->tree[count].setLabel(toString(nodePairsInTree[nodeSet]/(float)numTrees)); consensusTree->tree[left].setParent(count); consensusTree->tree[right].setParent(count); count++; return (count-1); } } catch(exception& e) { m->errorOut(e, "Consensus", "buildConcensusTree"); exit(1); } } //********************************************************************************************************************** int Consensus::getSets(vector& t) { try { vector temp; treeSet.clear(); //for each tree add the possible pairs you find for (int i = 0; i < t.size(); i++) { //for each non-leaf node get descendant info. for (int j = numLeaves; j < numNodes; j++) { if (m->getControl_pressed()) { return 1; } temp.clear(); //go through pcounts and pull out descendants for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) { temp.push_back(it->first); } //sort temp sort(temp.begin(), temp.end()); it2 = nodePairs.find(temp); if (it2 != nodePairs.end()) { nodePairs[temp]++; }else{ nodePairs[temp] = 1; } } } //add each leaf to terminate recursion in consensus //you want the leaves in there but with insignifigant sightings value so it is added last //for each leaf node get descendant info. for (int j = 0; j < numLeaves; j++) { if (m->getControl_pressed()) { return 1; } //only need the first one since leaves have no descendants but themselves it = t[0]->tree[j].pcount.begin(); temp.clear(); temp.push_back(it->first); //fill treeSet treeSet.push_back(it->first); //add leaf to list but with sighting value less then all non leaf pairs nodePairs[temp] = 0; } sort(treeSet.begin(), treeSet.end()); map< vector, int> nodePairsCopy = nodePairs; //set initial rating on pairs to sightings + subgroup sightings while (nodePairsCopy.size() != 0) { if (m->getControl_pressed()) { return 1; } vector smallOne = getSmallest(nodePairsCopy); int subgrouprate = getSubgroupRating(smallOne); nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate; nodePairsCopy.erase(smallOne); } return 0; } catch(exception& e) { m->errorOut(e, "Consensus", "getSets"); exit(1); } } //********************************************************************************************************************** vector Consensus::getSmallest(map< vector, int> nodes) { try{ vector smallest = nodes.begin()->first; int smallsize = smallest.size(); for(it2 = nodes.begin(); it2 != nodes.end(); it2++) { if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; } } return smallest; } catch(exception& e) { m->errorOut(e, "Consensus", "getSmallest"); exit(1); } } //********************************************************************************************************************** vector Consensus::getNextAvailableSet(vector bigset, vector& rest) { try { vector largest; largest.clear(); rest.clear(); //if you are just 2 groups if (bigset.size() == 2) { rest.push_back(bigset[0]); largest.push_back(bigset[1]); }else{ rest = bestSplit[bigset][0]; largest = bestSplit[bigset][1]; } //save for printing out later and for branch lengths nodePairsInTree[rest] = nodePairs[rest]; //delete whatever set you return because it is no longer available nodePairs.erase(rest); //save for printing out later and for branch lengths nodePairsInTree[largest] = nodePairs[largest]; //delete whatever set you return because it is no longer available nodePairs.erase(largest); return largest; } catch(exception& e) { m->errorOut(e, "Consensus", "getNextAvailableSet"); exit(1); } } /**********************************************************************************************************************/ int Consensus::getSubgroupRating(vector group) { try { map< vector, int>::iterator ittemp; map< vector< vector > , int >::iterator it3; int rate = 0; // ***********************************************************************************// //1. this function must be called passing it littlest sets to biggest // since it the rating is made from your sighting plus you best splits rating //2. it saves the top pair to use later // ***********************************************************************************// if (group.size() < 3) { return rate; } map< vector, int> possiblePairing; //this is all the subsets of group //go through the sets for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { //are you a subset of bigset, then save in possiblePairings if (isSubset(group, it2->first) ) { possiblePairing[it2->first] = it2->second; } } map< vector< vector > , int > rating; while (possiblePairing.size() != 0) { it2 = possiblePairing.begin(); vector temprest = getRestSet(group, it2->first); //is the rest a set available in possiblePairings ittemp = possiblePairing.find(temprest); if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map vector< vector > temprate; temprate.push_back(it2->first); temprate.push_back(temprest); rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]); //erase so you dont add 1,2 and 2,1. possiblePairing.erase(temprest); } possiblePairing.erase(it2); } it3 = rating.begin(); rate = it3->second; vector< vector > topPair = it3->first; //choose the split with the best rating for (it3 = rating.begin(); it3 != rating.end(); it3++) { if (it3->second > rate) { rate = it3->second; topPair = it3->first; } } bestSplit[group] = topPair; return rate; } catch(exception& e) { m->errorOut(e, "Consensus", "getSubgroupRating"); exit(1); } } //********************************************************************************************************************** vector Consensus::getRestSet(vector bigset, vector subset) { try { vector rest; for (int i = 0; i < bigset.size(); i++) { bool inSubset = false; for (int j = 0; j < subset.size(); j++) { if (bigset[i] == subset[j]) { inSubset = true; break; } } //its not in the subset so put it in the rest if (inSubset == false) { rest.push_back(bigset[i]); } } return rest; } catch(exception& e) { m->errorOut(e, "Consensus", "getRestSet"); exit(1); } } //********************************************************************************************************************** bool Consensus::isSubset(vector bigset, vector subset) { try { if (subset.size() > bigset.size()) { return false; } //check if each guy in suset is also in bigset for (int i = 0; i < subset.size(); i++) { bool match = false; for (int j = 0; j < bigset.size(); j++) { if (subset[i] == bigset[j]) { match = true; break; } } //you have a guy in subset that had no match in bigset if (match == false) { return false; } } return true; } catch(exception& e) { m->errorOut(e, "Consensus", "isSubset"); exit(1); } } //********************************************************************************************************************** int Consensus::findSpot(string node) { try { int spot = 0; //check if each guy in suset is also in bigset for (int i = 0; i < treeSet.size(); i++) { if (treeSet[i] == node) { spot = i; break; } } return spot; } catch(exception& e) { m->errorOut(e, "Consensus", "findSpot"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/consensus.h000077500000000000000000000043211424121717000166150ustar00rootroot00000000000000#ifndef CONCENSUS_H #define CONCENSUS_H /* * consensus.h * Mothur * * Created by Sarah Westcott on 4/29/09. * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. * */ #include "tree.h" #include "treemap.h" //NOTE: This class assumes all leaf nodes have 1 member. // Mothur does allow for names files with trees which would make a tree with multiple members at one leaf. // This class is currently only called internally by commands that have leaf node containing only 1 member. // But if in the future, this changes things will need to be reworked in getSets and buildConsensus. class Consensus { public: Consensus() { m = MothurOut::getInstance(); } ~Consensus() = default; Tree* getTree(vector&); private: MothurOut* m; Tree* consensusTree; vector treeSet; //set containing all members of the tree to start recursion. filled in getSets(). map< vector, int > nodePairs; //, vector< vector > > bestSplit; //maps a group to its best split map< vector, int > nodePairsInitialRate; map< vector, int > nodePairsInTree; map::iterator it; map< vector, int>::iterator it2; string outputFile, notIncluded, filename; int numNodes, numLeaves, count, numTrees; //count is the next available spot in the tree vector vector outputNames; int getSets(vector&); int getSubgroupRating(vector); vector getSmallest(map< vector, int>); vector getNextAvailableSet(vector, vector&); vector getRestSet(vector, vector); bool isSubset(vector, vector); int findSpot(string); int buildConsensusTree(vector); int printSetsInfo(); }; #endif mothur-1.48.0/source/core.h000077500000000000000000000247761424121717000155450ustar00rootroot00000000000000// Copyright 2006 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 #include namespace utf8 { // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers // You may need to change them to match your system. // These typedefs have the same names as ones from cstdint, or boost/cstdint typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; // Helper code - not intended to be directly called by the library users. May be changed at any time namespace internal { // Unicode constants // Leading (high) surrogates: 0xd800 - 0xdbff // Trailing (low) surrogates: 0xdc00 - 0xdfff const uint16_t LEAD_SURROGATE_MIN = 0xd800u; const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; // Maximum valid value for a Unicode code point const uint32_t CODE_POINT_MAX = 0x0010ffffu; template inline uint8_t mask8(octet_type oc) { return static_cast(0xff & oc); } template inline uint16_t mask16(u16_type oc) { return static_cast(0xffff & oc); } template inline bool is_trail(octet_type oc) { return ((utf8::internal::mask8(oc) >> 6) == 0x2); } template inline bool is_lead_surrogate(u16 cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); } template inline bool is_trail_surrogate(u16 cp) { return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } template inline bool is_surrogate(u16 cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } template inline bool is_code_point_valid(u32 cp) { return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); } template inline typename std::iterator_traits::difference_type sequence_length(octet_iterator lead_it) { uint8_t lead = utf8::internal::mask8(*lead_it); if (lead < 0x80) return 1; else if ((lead >> 5) == 0x6) return 2; else if ((lead >> 4) == 0xe) return 3; else if ((lead >> 3) == 0x1e) return 4; else return 0; } template inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) { if (cp < 0x80) { if (length != 1) return true; } else if (cp < 0x800) { if (length != 2) return true; } else if (cp < 0x10000) { if (length != 3) return true; } return false; } enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; /// Helper for get_sequence_x template utf_error increase_safely(octet_iterator& it, octet_iterator end) { if (++it == end) return NOT_ENOUGH_ROOM; if (!utf8::internal::is_trail(*it)) return INCOMPLETE_SEQUENCE; return UTF8_OK; } #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} /// get_sequence_x functions decode utf-8 sequences of the length x template utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; code_point = utf8::internal::mask8(*it); return UTF8_OK; } template utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; code_point = utf8::internal::mask8(*it); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); return UTF8_OK; } template utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; code_point = utf8::internal::mask8(*it); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point += (*it) & 0x3f; return UTF8_OK; } template utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; code_point = utf8::internal::mask8(*it); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) code_point += (*it) & 0x3f; return UTF8_OK; } #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) { if (it == end) return NOT_ENOUGH_ROOM; // Save the original value of it so we can go back in case of failure // Of course, it does not make much sense with i.e. stream iterators octet_iterator original_it = it; uint32_t cp = 0; // Determine the sequence length based on the lead octet typedef typename std::iterator_traits::difference_type octet_difference_type; const octet_difference_type length = utf8::internal::sequence_length(it); // Get trail octets and calculate the code point utf_error err = UTF8_OK; switch (length) { case 0: return INVALID_LEAD; case 1: err = utf8::internal::get_sequence_1(it, end, cp); break; case 2: err = utf8::internal::get_sequence_2(it, end, cp); break; case 3: err = utf8::internal::get_sequence_3(it, end, cp); break; case 4: err = utf8::internal::get_sequence_4(it, end, cp); break; } if (err == UTF8_OK) { // Decoding succeeded. Now, security checks... if (utf8::internal::is_code_point_valid(cp)) { if (!utf8::internal::is_overlong_sequence(cp, length)){ // Passed! Return here. code_point = cp; ++it; return UTF8_OK; } else err = OVERLONG_SEQUENCE; } else err = INVALID_CODE_POINT; } // Failure branch - restore the original value of the iterator it = original_it; return err; } template inline utf_error validate_next(octet_iterator& it, octet_iterator end) { uint32_t ignored; return utf8::internal::validate_next(it, end, ignored); } } // namespace internal /// The library API - functions intended to be called by the users // Byte order mark const uint8_t bom[] = {0xef, 0xbb, 0xbf}; template octet_iterator find_invalid(octet_iterator start, octet_iterator end) { octet_iterator result = start; while (result != end) { utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); if (err_code != internal::UTF8_OK) return result; } return result; } template inline bool is_valid(octet_iterator start, octet_iterator end) { return (utf8::find_invalid(start, end) == end); } template inline bool starts_with_bom (octet_iterator it, octet_iterator end) { return ( ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) ); } //Deprecated in release 2.3 template inline bool is_bom (octet_iterator it) { return ( (utf8::internal::mask8(*it++)) == bom[0] && (utf8::internal::mask8(*it++)) == bom[1] && (utf8::internal::mask8(*it)) == bom[2] ); } } // namespace utf8 #endif // header guard mothur-1.48.0/source/currentfile.cpp000077500000000000000000000412651424121717000174620ustar00rootroot00000000000000// // currentfile.cpp // Mothur // // Created by Sarah Westcott on 11/9/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "currentfile.h" /*********************************************************************************************/ set CurrentFile::getCurrentTypes() { try { set types; types.insert("fasta"); types.insert("summary"); types.insert("file"); types.insert("accnos"); types.insert("column"); types.insert("design"); types.insert("group"); types.insert("list"); types.insert("name"); types.insert("oligos"); types.insert("order"); types.insert("ordergroup"); types.insert("phylip"); types.insert("qfile"); types.insert("relabund"); types.insert("clr"); types.insert("sabund"); types.insert("rabund"); types.insert("sff"); types.insert("shared"); types.insert("taxonomy"); types.insert("constaxonomy"); types.insert("contigsreport"); types.insert("tree"); types.insert("flow"); types.insert("biom"); types.insert("count"); types.insert("processors"); types.insert("sample"); return types; } catch(exception& e) { m->errorOut(e, "CurrentFile", "getCurrentTypes"); exit(1); } } /*********************************************************************************************/ void CurrentFile::printCurrentFiles(string filename) { try { lock_guard guard(currentProtector); if (filename != "") { ofstream out; util.openOutputFile(filename, out); if (accnosfile != "") { out << "accnos=" + accnosfile + "\n"; } if (columnfile != "") { out << "column=" + columnfile + "\n"; } if (designfile != "") { out << "design=" + designfile + "\n"; } if (fastafile != "") { out << "fasta=" + fastafile + "\n"; } if (groupfile != "") { out << "group=" + groupfile + "\n"; } if (listfile != "") { out << "list=" + listfile + "\n"; } if (namefile != "") { out << "name=" + namefile + "\n"; } if (oligosfile != "") { out << "oligos=" + oligosfile + "\n"; } if (orderfile != "") { out << "order=" + orderfile + "\n"; } if (ordergroupfile != "") { out << "ordergroup=" + ordergroupfile + "\n"; } if (phylipfile != "") { out << "phylip=" + phylipfile + "\n"; } if (qualfile != "") { out << "qfile=" + qualfile + "\n"; } if (rabundfile != "") { out << "rabund=" + rabundfile + "\n"; } if (relabundfile != "") { out << "relabund=" + relabundfile + "\n"; } if (clrfile != "") { out << "clr=" + clrfile + "\n"; } if (sabundfile != "") { out << "sabund=" + sabundfile + "\n"; } if (sfffile != "") { out << "sff=" + sfffile + "\n"; } if (sharedfile != "") { out << "shared=" + sharedfile + "\n"; } if (taxonomyfile != "") { out << "taxonomy=" + taxonomyfile + "\n"; } if (constaxonomyfile != "") { out << "constaxonomy=" + constaxonomyfile + "\n"; } if (contigsreportfile != ""){ out << "contigsreport=" + contigsreportfile + "\n";} if (treefile != "") { out << "tree=" + treefile + "\n"; } if (flowfile != "") { out << "flow=" + flowfile + "\n"; } if (biomfile != "") { out << "biom=" + biomfile + "\n"; } if (countfile != "") { out << "count=" + countfile + "\n"; } if (processors != "1") { out << "processors=" + processors + "\n"; } if (summaryfile != "") { out << "summary=" + summaryfile + "\n"; } if (filefile != "") { out << "file=" + filefile + "\n"; } if (samplefile != "") { out << "sample=" + samplefile + "\n"; } out.close(); } if (accnosfile != "") { m->mothurOut("accnos=" + accnosfile); m->mothurOutEndLine(); } if (columnfile != "") { m->mothurOut("column=" + columnfile); m->mothurOutEndLine(); } if (designfile != "") { m->mothurOut("design=" + designfile); m->mothurOutEndLine(); } if (fastafile != "") { m->mothurOut("fasta=" + fastafile); m->mothurOutEndLine(); } if (groupfile != "") { m->mothurOut("group=" + groupfile); m->mothurOutEndLine(); } if (listfile != "") { m->mothurOut("list=" + listfile); m->mothurOutEndLine(); } if (namefile != "") { m->mothurOut("name=" + namefile); m->mothurOutEndLine(); } if (oligosfile != "") { m->mothurOut("oligos=" + oligosfile); m->mothurOutEndLine(); } if (orderfile != "") { m->mothurOut("order=" + orderfile); m->mothurOutEndLine(); } if (ordergroupfile != "") { m->mothurOut("ordergroup=" + ordergroupfile); m->mothurOutEndLine(); } if (phylipfile != "") { m->mothurOut("phylip=" + phylipfile); m->mothurOutEndLine(); } if (qualfile != "") { m->mothurOut("qfile=" + qualfile); m->mothurOutEndLine(); } if (rabundfile != "") { m->mothurOut("rabund=" + rabundfile); m->mothurOutEndLine(); } if (relabundfile != "") { m->mothurOut("relabund=" + relabundfile); m->mothurOutEndLine(); } if (clrfile != "") { m->mothurOut("clr=" + clrfile); m->mothurOutEndLine(); } if (sabundfile != "") { m->mothurOut("sabund=" + sabundfile); m->mothurOutEndLine(); } if (sfffile != "") { m->mothurOut("sff=" + sfffile); m->mothurOutEndLine(); } if (sharedfile != "") { m->mothurOut("shared=" + sharedfile); m->mothurOutEndLine(); } if (taxonomyfile != "") { m->mothurOut("taxonomy=" + taxonomyfile); m->mothurOutEndLine(); } if (constaxonomyfile != "") { m->mothurOut("constaxonomy=" + constaxonomyfile); m->mothurOutEndLine();} if (contigsreportfile != ""){ m->mothurOut("contigsreport=" + contigsreportfile); m->mothurOutEndLine();} if (treefile != "") { m->mothurOut("tree=" + treefile); m->mothurOutEndLine(); } if (flowfile != "") { m->mothurOut("flow=" + flowfile); m->mothurOutEndLine(); } if (biomfile != "") { m->mothurOut("biom=" + biomfile); m->mothurOutEndLine(); } if (countfile != "") { m->mothurOut("count=" + countfile); m->mothurOutEndLine(); } if (processors != "1") { m->mothurOut("processors=" + processors); m->mothurOutEndLine(); } if (summaryfile != "") { m->mothurOut("summary=" + summaryfile); m->mothurOutEndLine(); } if (filefile != "") { m->mothurOut("file=" + filefile); m->mothurOutEndLine(); } if (samplefile != "") { m->mothurOut("sample=" + samplefile); m->mothurOutEndLine(); } } catch(exception& e) { m->errorOut(e, "CurrentFile", "printCurrentFiles"); exit(1); } } /*********************************************************************************************/ bool CurrentFile::hasCurrentFiles() { try { lock_guard guard(currentProtector); bool hasCurrent = false; if (accnosfile != "") { return true; } if (columnfile != "") { return true; } if (designfile != "") { return true; } if (fastafile != "") { return true; } if (groupfile != "") { return true; } if (listfile != "") { return true; } if (namefile != "") { return true; } if (oligosfile != "") { return true; } if (orderfile != "") { return true; } if (ordergroupfile != "") { return true; } if (phylipfile != "") { return true; } if (qualfile != "") { return true; } if (rabundfile != "") { return true; } if (relabundfile != "") { return true; } if (clrfile != "") { return true; } if (sabundfile != "") { return true; } if (sfffile != "") { return true; } if (sharedfile != "") { return true; } if (taxonomyfile != "") { return true; } if (constaxonomyfile != "") { return true; } if (contigsreportfile != ""){ return true; } if (treefile != "") { return true; } if (flowfile != "") { return true; } if (biomfile != "") { return true; } if (countfile != "") { return true; } if (summaryfile != "") { return true; } if (filefile != "") { return true; } if (samplefile != "") { return true; } if (processors != "1") { return true; } return hasCurrent; } catch(exception& e) { m->errorOut(e, "CurrentFile", "hasCurrentFiles"); exit(1); } } /*********************************************************************************************/ void CurrentFile::clearCurrentFiles() { try { lock_guard guard(currentProtector); phylipfile = ""; filefile = ""; columnfile = ""; listfile = ""; rabundfile = ""; sabundfile = ""; namefile = ""; groupfile = ""; designfile = ""; orderfile = ""; treefile = ""; sharedfile = ""; ordergroupfile = ""; contigsreportfile = ""; constaxonomyfile = ""; relabundfile = ""; clrfile = ""; fastafile = ""; qualfile = ""; sfffile = ""; oligosfile = ""; accnosfile = ""; taxonomyfile = ""; flowfile = ""; biomfile = ""; countfile = ""; summaryfile = ""; samplefile = ""; unsigned concurentThreadsSupported = std::thread::hardware_concurrency(); if (concurentThreadsSupported < 1) { concurentThreadsSupported = 1; } //in case thread errors processors = toString(concurentThreadsSupported); } catch(exception& e) { m->errorOut(e, "CurrentFile", "clearCurrentFiles"); exit(1); } } /*********************************************************************************************/ int CurrentFile::setProcessors(string p) { try { lock_guard guard(currentProtector); if (!util.isInteger(p)) { unsigned concurentThreadsSupported = std::thread::hardware_concurrency(); if (concurentThreadsSupported < 1) { concurentThreadsSupported = 1; } //in case thread errors processors = toString(concurentThreadsSupported); m->mothurOut("[ERROR]: " + p + " is not an integer. Setting processors to " + toString(processors) + "\n"); }else { processors = p; m->mothurOut("\nUsing " + toString(processors) + " processors.\n"); } int numProcessors = 1; util.mothurConvert(p, numProcessors); return numProcessors; } catch(exception& e) { m->errorOut(e, "CurrentFile", "clearCurrentFiles"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setDefaultPath(vector pathnames) { try { lock_guard guard(currentProtector); defaultPath.clear(); for (int i = 0; i < pathnames.size(); i++) { string pathname = pathnames[i]; if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } defaultPath.push_back(util.getFullPathName(pathname)); } } catch(exception& e) { m->errorOut(e, "CurrentFile", "setDefaultPath"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setTestFilePath(string pathname) { try { lock_guard guard(currentProtector); if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } testFilePath = util.getFullPathName(pathname); } catch(exception& e) { m->errorOut(e, "CurrentFile", "setTestFilePath"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setHomePath(string pathname) { try { lock_guard guard(currentProtector); if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } homePath = util.getFullPathName(pathname); m->setHomePath(homePath); } catch(exception& e) { m->errorOut(e, "CurrentFile", "setHomePath"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setPaths(vector pathVariables) { try { lock_guard guard(currentProtector); for (int i = 0; i < pathVariables.size(); i++) { string pathname = pathVariables[i]; if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } pathVariables[i] = util.getFullPathName(pathname); } paths = pathVariables; m->setPaths(paths); } catch(exception& e) { m->errorOut(e, "CurrentFile", "setPaths"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setToolsPath(vector pathnames) { try { lock_guard guard(currentProtector); toolsPath.clear(); for (int i = 0; i < pathnames.size(); i++) { string pathname = pathnames[i]; if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } toolsPath.push_back(util.getFullPathName(pathname)); } } catch(exception& e) { m->errorOut(e, "CurrentFile", "setToolsPath"); exit(1); } } /*********************************************************************************************/ void CurrentFile::setInputDir(vector pathnames) { try { lock_guard guard(currentProtector); inputDir.clear(); for (int i = 0; i < pathnames.size(); i++) { string pathname = pathnames[i]; if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } inputDir.push_back(util.getFullPathName(pathname)); } } catch(exception& e) { m->errorOut(e, "CurrentFile", "setToolsPath"); exit(1); } } /*********************************************************************************************/ //locations[0] = inputdir paths, locations[1] = outputdirPaths, locations[2] = mothur's exe path, locations[3] = mothur tools paths, locations[4] = mothur_files paths vector< vector > CurrentFile::getLocations() { try { lock_guard guard(currentProtector); vector< vector > locations; //allows for multiple locations, order matters locations.push_back(inputDir); vector outputDirs; outputDirs.push_back(outputDir); locations.push_back(outputDirs); vector mothurHome; mothurHome.push_back(mothurProgramPath); locations.push_back(mothurHome); //MOTHUR_TOOLS locations.push_back(toolsPath); //MOTHUR_FILES locations.push_back(defaultPath); return locations; } catch(exception& e) { m->errorOut(e, "CurrentFile", "setToolsPath"); exit(1); } } //{ } /*********************************************************************************************/ mothur-1.48.0/source/currentfile.h000077500000000000000000000316761424121717000171340ustar00rootroot00000000000000#ifndef CURRENTFILE_H #define CURRENTFILE_H /* * currentfile.h * Mothur * * Created by westcott on 3/15/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ //NOT ThreadSafe - but designed to be read only from threads and read write from main thread. #include "mothurout.h" #include "utils.hpp" /***********************************************/ class CurrentFile { public: static CurrentFile* getInstance() { if(instance == 0) { instance = new CurrentFile(); } return instance; } unsigned long long getRAMUsed(); unsigned long long getTotalRAM(); string getPhylipFile() { lock_guard guard(currentProtector); return phylipfile; } string getColumnFile() { lock_guard guard(currentProtector); return columnfile; } string getListFile() { lock_guard guard(currentProtector); return listfile; } string getRabundFile() { lock_guard guard(currentProtector); return rabundfile; } string getSabundFile() { lock_guard guard(currentProtector); return sabundfile; } string getNameFile() { lock_guard guard(currentProtector); return namefile; } string getGroupFile() { lock_guard guard(currentProtector); return groupfile; } string getOrderFile() { lock_guard guard(currentProtector); return orderfile; } string getOrderGroupFile() { lock_guard guard(currentProtector); return ordergroupfile; } string getTreeFile() { lock_guard guard(currentProtector); return treefile; } string getSharedFile() { lock_guard guard(currentProtector); return sharedfile; } string getRelAbundFile() { lock_guard guard(currentProtector); return relabundfile; } string getCLRFile() { lock_guard guard(currentProtector); return clrfile; } string getDesignFile() { lock_guard guard(currentProtector); return designfile; } string getFastaFile() { lock_guard guard(currentProtector); return fastafile; } string getSFFFile() { lock_guard guard(currentProtector); return sfffile; } string getQualFile() { lock_guard guard(currentProtector); return qualfile; } string getOligosFile() { lock_guard guard(currentProtector); return oligosfile; } string getSampleFile() { lock_guard guard(currentProtector); return samplefile; } string getAccnosFile() { lock_guard guard(currentProtector); return accnosfile; } string getTaxonomyFile() { lock_guard guard(currentProtector); return taxonomyfile; } string getFlowFile() { lock_guard guard(currentProtector); return flowfile; } string getContigsReportFile(){ lock_guard guard(currentProtector); return contigsreportfile; } string getBiomFile() { lock_guard guard(currentProtector); return biomfile; } string getCountFile() { lock_guard guard(currentProtector); return countfile; } string getSummaryFile() { lock_guard guard(currentProtector); return summaryfile; } string getFileFile() { lock_guard guard(currentProtector); return filefile; } string getConsTaxonomyFile(){ lock_guard guard(currentProtector); return constaxonomyfile; } void setListFile(string f) { lock_guard guard(currentProtector); listfile = util.getFullPathName(f); } void setBiomFile(string f) { lock_guard guard(currentProtector); biomfile = util.getFullPathName(f); } void setFlowFile(string f) { lock_guard guard(currentProtector); flowfile = util.getFullPathName(f); } void setContigsReportFile(string f) { lock_guard guard(currentProtector); contigsreportfile = util.getFullPathName(f); } void setSummaryFile(string f) { lock_guard guard(currentProtector); summaryfile = util.getFullPathName(f); } void setTreeFile(string f) { lock_guard guard(currentProtector); treefile = util.getFullPathName(f); } void setGroupFile(string f) { lock_guard guard(currentProtector); groupfile = util.getFullPathName(f); setGroupMode("group"); } void setCountFile(string f) { lock_guard guard(currentProtector); countfile = util.getFullPathName(f); setGroupMode("count"); } void setPhylipFile(string f) { lock_guard guard(currentProtector); phylipfile = util.getFullPathName(f); } void setColumnFile(string f) { lock_guard guard(currentProtector); columnfile = util.getFullPathName(f); } void setNameFile(string f) { lock_guard guard(currentProtector); namefile = util.getFullPathName(f); } void setRabundFile(string f) { lock_guard guard(currentProtector); rabundfile = util.getFullPathName(f); } void setSabundFile(string f) { lock_guard guard(currentProtector); sabundfile = util.getFullPathName(f); } void setSharedFile(string f) { lock_guard guard(currentProtector); sharedfile = util.getFullPathName(f); } void setRelAbundFile(string f) { lock_guard guard(currentProtector); relabundfile = util.getFullPathName(f); } void setCLRFile(string f) { lock_guard guard(currentProtector); clrfile = util.getFullPathName(f); } void setOrderFile(string f) { lock_guard guard(currentProtector); orderfile = util.getFullPathName(f); } void setOrderGroupFile(string f) { lock_guard guard(currentProtector); ordergroupfile = util.getFullPathName(f); } void setDesignFile(string f) { lock_guard guard(currentProtector); designfile = util.getFullPathName(f); } void setFastaFile(string f) { lock_guard guard(currentProtector); fastafile = util.getFullPathName(f); } void setSFFFile(string f) { lock_guard guard(currentProtector); sfffile = util.getFullPathName(f); } void setQualFile(string f) { lock_guard guard(currentProtector); qualfile = util.getFullPathName(f); } void setOligosFile(string f) { lock_guard guard(currentProtector); oligosfile = util.getFullPathName(f); } void setAccnosFile(string f) { lock_guard guard(currentProtector); accnosfile = util.getFullPathName(f); } void setTaxonomyFile(string f) { lock_guard guard(currentProtector); taxonomyfile = util.getFullPathName(f); } void setConsTaxonomyFile(string f) { lock_guard guard(currentProtector); constaxonomyfile = util.getFullPathName(f); } void setProgramPath(string f) { lock_guard guard(currentProtector); mothurProgramPath = util.getFullPathName(f); } void setFileFile(string f) { lock_guard guard(currentProtector); filefile = util.getFullPathName(f); } void setSampleFile(string f) { lock_guard guard(currentProtector); samplefile = util.getFullPathName(f); } //current files - if you add a new type you must edit optionParser->getParameters, get.current and set.current commands and mothurOut->printCurrentFiles/clearCurrentFiles/getCurrentTypes/hasCurrentFiles. add a get and set function. string getProcessors() { lock_guard guard(currentProtector); return processors; } int setProcessors(string p); string getProgramPath() { lock_guard guard(currentProtector); return mothurProgramPath; } //default paths = MOTHUR_FILES vector getDefaultPath() { lock_guard guard(currentProtector); return defaultPath; } void setDefaultPath(vector); vector getToolsPath() { lock_guard guard(currentProtector); return toolsPath; } void setToolsPath(vector); string getTestFilePath() { lock_guard guard(currentProtector); return testFilePath; } void setTestFilePath(string); string getHomePath() { lock_guard guard(currentProtector); return homePath; } void setHomePath(string); vector getPaths() { lock_guard guard(currentProtector); return paths; } //environment variable 'PATH' values void setPaths(vector); string getOutputDir() { lock_guard guard(currentProtector); return outputDir; } void setOutputDir(string f) { lock_guard guard(currentProtector); outputDir = util.getFullPathName(f); } vector getInputDir() { lock_guard guard(currentProtector); return inputDir; } void setInputDir(vector f); void setFileName(string); string getReleaseDate() { lock_guard guard(currentProtector); return releaseDate; } void setReleaseDate(string r) { lock_guard guard(currentProtector); releaseDate = r; } string getVersion() { lock_guard guard(currentProtector); return version; } void setVersion(string r) { lock_guard guard(currentProtector); version = r; } vector< vector > getLocations(); bool getMothurCalling() { lock_guard guard(currentProtector); return mothurCalling; } void setMothurCalling(bool t) { lock_guard guard(currentProtector); mothurCalling = t; } void printCurrentFiles(string); //string="" for just to logfile. void clearCurrentFiles(); set getCurrentTypes(); bool hasCurrentFiles(); string getGroupMode() { lock_guard guard(currentProtector); return groupMode; } string getTestDirectory() { lock_guard guard(currentProtector); return testDirectory; } void setTestDirectory(string t) { lock_guard guard(currentProtector); testDirectory = t; } private: MothurOut* m; Utils util; vector paths, defaultPath, toolsPath, inputDir; //paths stored in PATH environment variables, defaultPaths = MOTHUR_FILES, toolsPath = MOTHUR_TOOLS string logFileName, mothurProgramPath, homePath; string outputDir, releaseDate, version; string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, filefile, testFilePath, contigsreportfile, clrfile; string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile, countfile, summaryfile, constaxonomyfile, groupMode, testDirectory, sharedHeaderMode, samplefile; bool mothurCalling; void setGroupMode(string t) { groupMode = t; } static CurrentFile* instance; CurrentFile( const CurrentFile& ); // Disable copy constructor void operator=( const CurrentFile& ); // Disable assignment operator std::mutex currentProtector; CurrentFile() { m = MothurOut::getInstance(); testFilePath = ""; outputDir= ""; accnosfile = ""; filefile = ""; phylipfile = ""; columnfile = ""; listfile = ""; rabundfile = ""; sabundfile = ""; namefile = ""; phylipfile = ""; columnfile = ""; listfile = ""; rabundfile = ""; sabundfile = ""; namefile = ""; groupfile = ""; designfile = ""; orderfile = ""; treefile = ""; sharedfile = ""; ordergroupfile = ""; relabundfile = ""; clrfile = ""; fastafile = ""; qualfile = ""; sfffile = ""; oligosfile = ""; groupfile = ""; designfile = ""; orderfile = ""; treefile = ""; sharedfile = ""; ordergroupfile = ""; relabundfile = ""; fastafile = ""; qualfile = ""; sfffile = ""; oligosfile = ""; accnosfile = ""; taxonomyfile = ""; constaxonomyfile = ""; samplefile = ""; unsigned concurentThreadsSupported = std::thread::hardware_concurrency(); if (concurentThreadsSupported < 1) { concurentThreadsSupported = 1; } //in case thread errors processors = toString(concurentThreadsSupported); flowfile = ""; biomfile = ""; countfile = ""; summaryfile = ""; contigsreportfile = ""; groupMode = "group"; sharedHeaderMode = "otu"; mothurCalling = false; } ~CurrentFile() { instance = 0; } }; /***********************************************/ #endif mothur-1.48.0/source/datastructures/000077500000000000000000000000001424121717000174765ustar00rootroot00000000000000mothur-1.48.0/source/datastructures/alignment.cpp000077500000000000000000000251651424121717000221740ustar00rootroot00000000000000/* * alignment.cpp * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is a class for an abstract datatype for classes that implement various types of alignment algorithms. * As of 12/01/21 these included alignments based on needleman-wunsch, and the Gotoh algorithms * */ #include "alignmentcell.hpp" #include "alignment.hpp" /**************************************************************************************************/ Alignment::Alignment() { m = MothurOut::getInstance(); /* do nothing */ } /**************************************************************************************************/ Alignment::Alignment(int A) : nCols(A), nRows(A) { try { m = MothurOut::getInstance(); alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming for(int i=0;ierrorOut(e, "Alignment", "Alignment"); exit(1); } } /**************************************************************************************************/ Alignment::Alignment(int A, int nk) : nCols(A), nRows(A) { try { m = MothurOut::getInstance(); alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming for(int i=0;ierrorOut(e, "Alignment", "Alignment"); exit(1); } } /**************************************************************************************************/ //only gets bigger void Alignment::resize(int A) { try { nCols = A; nRows = A; alignment.resize(nRows); for(int i=0;ierrorOut(e, "Alignment", "resize"); exit(1); } } /**************************************************************************************************/ void Alignment::traceBack(bool createBaseMap){ // This traceback routine is used by the dynamic programming algorithms try { BBaseMap.clear(); ABaseMap.clear(); // to fill the values of seqAaln and seqBaln seqAaln = ""; seqBaln = ""; int row = lB-1; int column = lA-1; // seqAstart = 1; // seqAend = column; AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the // matrix if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom- else{ // right corner bail out because it means nothing got aligned int count = 0; while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going... if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB if (createBaseMap) { BBaseMap[row] = count; } //currentCell = alignment[--row][column]; --row; } else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA if (createBaseMap) { ABaseMap[column] = count; } //currentCell = alignment[row][--column]; --column; } else{ seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left, seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments if (createBaseMap) { BBaseMap[row] = count; ABaseMap[column] = count; } //currentCell = alignment[--row][--column]; --row; --column; } if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; } else { break; } count++; } } pairwiseLength = seqAaln.length(); seqAstart = 1; seqAend = 0; seqBstart = 1; seqBend = 0; if (createBaseMap) { //flip maps since we now know the total length map newAMap; for (map::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) { int spot = it->second; newAMap[pairwiseLength-spot-1] = it->first-1; } ABaseMap = newAMap; map newBMap; for (map::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) { int spot = it->second; newBMap[pairwiseLength-spot-1] = it->first-1; } BBaseMap = newBMap; } for(int i=0;i=0;i--){ if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAend++; } else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBend++; } else { break; } } pairwiseLength -= (seqAend + seqBend); seqAend = seqA.length() - seqAend - 1; seqBend = seqB.length() - seqBend - 1; } catch(exception& e) { m->errorOut(e, "Alignment", "traceBack"); exit(1); } } /**************************************************************************************************/ //disables start and end postions and pairwise length void Alignment::proteinTraceBack(vector seqA, vector seqB){ // This traceback routine is used by the dynamic programming algorithms try { BBaseMap.clear(); ABaseMap.clear(); // to fill the values of seqAaln and seqBaln seqAaln = ""; seqBaln = ""; int row = lB-1; int column = lA-1; AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the // matrix if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom- else{ // right corner bail out because it means nothing got aligned int count = 0; while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going... if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the seqAaln = "---" + seqAaln; // matrix. this indicates that we need to insert a gap in seqBaln = seqB[row].getAmino() + seqBaln; // seqA and a base in seqB --row; } else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA --column; } else{ seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left, seqBaln = seqB[row].getAmino() + seqBaln; // here we add a base to both alignments --row; --column; } if ((row >= 0) && (column >= 0)) { currentCell = alignment[row][column]; } else { break; } count++; } } pairwiseLength = 0; seqAstart = 0; seqBstart = 0; seqAend = 0; seqBend = 0; } catch(exception& e) { m->errorOut(e, "Alignment", "proteinTraceBack"); exit(1); } } /**************************************************************************************************/ Alignment::~Alignment(){ try { for (int i = 0; i < alignment.size(); i++) { for (int j = (alignment[i].size()-1); j >= 0; j--) { alignment[i].pop_back(); } } alignment.clear(); } catch(exception& e) { m->errorOut(e, "Alignment", "~Alignment"); exit(1); } } /**************************************************************************************************/ string Alignment::getSeqAAln(){ return seqAaln; // this is called to get the alignment of seqA } /**************************************************************************************************/ string Alignment::getSeqBAln(){ return seqBaln; // this is called to get the alignment of seqB } /**************************************************************************************************/ int Alignment::getCandidateStartPos(){ return seqAstart; // this is called to report the quality of the alignment } /**************************************************************************************************/ int Alignment::getCandidateEndPos(){ return seqAend; // this is called to report the quality of the alignment } /**************************************************************************************************/ int Alignment::getTemplateStartPos(){ return seqBstart; // this is called to report the quality of the alignment } /**************************************************************************************************/ map Alignment::getSeqAAlnBaseMap(){ return ABaseMap; } /**************************************************************************************************/ map Alignment::getSeqBAlnBaseMap(){ return BBaseMap; } /**************************************************************************************************/ int Alignment::getTemplateEndPos(){ return seqBend; // this is called to report the quality of the alignment } /**************************************************************************************************/ int Alignment::getPairwiseLength(){ return pairwiseLength; // this is the pairwise alignment length } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/alignment.hpp000077500000000000000000000032431424121717000221720ustar00rootroot00000000000000#ifndef DPALIGNMENT_H #define DPALIGNMENT_H /* * dpalignment.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is a class for an abstract datatype for classes that implement various types of alignment algorithms. * As of 12/01/21 these included alignments based on needleman-wunsch, and the Gotoh algorithms * */ #include "mothur.h" #include "alignmentcell.hpp" #include "currentfile.h" #include "protein.hpp" #include "sequence.hpp" /**************************************************************************************************/ class Alignment { public: Alignment(int); Alignment(int, int); Alignment(); virtual ~Alignment(); virtual void align(string, string, bool createBaseMap=false) = 0; virtual void alignPrimer(string, string) {} virtual void align(Sequence, Protein) {} string getSeqAAln(); string getSeqBAln(); map getSeqAAlnBaseMap(); map getSeqBAlnBaseMap(); int getCandidateStartPos(); int getCandidateEndPos(); int getTemplateStartPos(); int getTemplateEndPos(); int getPairwiseLength(); virtual void resize(int); int getnRows() { return nRows; } protected: void traceBack(bool createBaseMap); void proteinTraceBack(vector, vector); string seqA, seqAaln; string seqB, seqBaln; int seqAstart, seqAend; int seqBstart, seqBend; int pairwiseLength; int nRows, nCols, lA, lB; vector > alignment; map ABaseMap; map BBaseMap; MothurOut* m; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/alignmentcell.cpp000077500000000000000000000013351424121717000230250ustar00rootroot00000000000000/* * alignmentcell.cpp * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is pretty basic. Each AlignmentCell object contains a pointer to the previous cell and different values * used to calcualte the alignment. Initially everything is set to zero and all pointers are set to 'x' * */ #include "alignmentcell.hpp" //******************************************************************************************************************** AlignmentCell::AlignmentCell() : prevCell('x'), cValue(0.0000), dValue(0.0000), iValue(0.0000) {} //******************************************************************************************************************** mothur-1.48.0/source/datastructures/alignmentcell.hpp000077500000000000000000000015031424121717000230270ustar00rootroot00000000000000#ifndef ALIGNMENTCELL_H #define ALIGNMENTCELL_H /* * alignmentcell.hpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is pretty basic. Each AlignmentCell object contains a pointer to the previous cell and different values * used to calcualte the alignment. Initially everything is set to zero and all pointers are set to 'x' * */ #include "mothurout.h" //******************************************************************************************************************** class AlignmentCell { public: AlignmentCell(); ~AlignmentCell() = default; char prevCell; float cValue; float dValue; float iValue; }; //******************************************************************************************************************** #endif mothur-1.48.0/source/datastructures/alignmentdb.cpp000077500000000000000000000133271424121717000224770ustar00rootroot00000000000000/* * alignmentdb.cpp * Mothur * * Created by westcott on 11/4/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "alignmentdb.h" #include "kmerdb.hpp" #include "suffixdb.hpp" /**************************************************************************************************/ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid, bool writeShortcut){ // This assumes that the template database is in fasta format, may try { // need to alter this in the future? m = MothurOut::getInstance(); current = CurrentFile::getInstance(); longest = 0; method = s; bool needToGenerate = true; threadID = tid; Utils util; long start = time(nullptr); m->mothurOut("\nReading in the " + fastaFileName + " template sequences...\t"); cout.flush(); //bool aligned = false; int tempLength = 0; ifstream fastaFile; util.openInputFile(fastaFileName, fastaFile); while (!fastaFile.eof()) { Sequence temp(fastaFile); gobble(fastaFile); if (m->getControl_pressed()) { templateSequences.clear(); break; } if (temp.getName() != "") { templateSequences.push_back(temp); //save longest base if (temp.getUnaligned().length() >= longest) { longest = ((int)temp.getUnaligned().length()+1); } if (tempLength != 0) { if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->setControl_pressed(true); } }else { tempLength = (int)temp.getAligned().length(); } } } fastaFile.close(); numSeqs = (int)templateSequences.size(); //all of this is elsewhere already! m->mothurOut("DONE.\n"); cout.flush(); m->mothurOut("It took " + toString(time(nullptr) - start) + " to read " + toString(templateSequences.size()) + " sequences.\n"); //in case you delete the seqs and then ask for them emptySequence = Sequence(); emptySequence.setName("no_match"); emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); string kmerDBName; if(method == "kmer") { search = new KmerDB(fastaFileName, kmerSize); kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTest(kmerDBName.c_str()); if(kmerFileTest){ string line = util.getline(kmerFileTest); bool GoodFile = util.checkReleaseVersion(line, current->getVersion()); kmerFileTest.close(); int shortcutTimeStamp = util.getTimeStamp(kmerDBName); int referenceTimeStamp = util.getTimeStamp(fastaFileName); //if the shortcut file is older then the reference file, remake shortcut file if (shortcutTimeStamp < referenceTimeStamp) { GoodFile = false; } if (GoodFile) { needToGenerate = false; } } } else if(method == "suffix") { search = new SuffixDB(numSeqs); } else { method = "kmer"; m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.\n"); search = new KmerDB(fastaFileName, 8); } if (!m->getControl_pressed()) { if (needToGenerate) { //add sequences to search for (int i = 0; i < templateSequences.size(); i++) { search->addSequence(templateSequences[i]); if (m->getControl_pressed()) { templateSequences.clear(); break; } } if (m->getControl_pressed()) { templateSequences.clear(); } if ((method != "kmer") || ((method == "kmer") && (writeShortcut))) { search->generateDB(); } }else if ((method == "kmer") && (!needToGenerate)) { ifstream kmerFileTest(kmerDBName.c_str()); search->readDB(kmerFileTest); } search->setNumSeqs(numSeqs); } } catch(exception& e) { m->errorOut(e, "AlignmentDB", "AlignmentDB"); exit(1); } } /**************************************************************************************************/ AlignmentDB::AlignmentDB(string s){ try { m = MothurOut::getInstance(); method = s; if(method == "suffix") { search = new SuffixDB(); } else { search = new KmerDB(); } //in case you delete the seqs and then ask for them emptySequence = Sequence(); emptySequence.setName("no_match"); emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); } catch(exception& e) { m->errorOut(e, "AlignmentDB", "AlignmentDB"); exit(1); } } /**************************************************************************************************/ AlignmentDB::~AlignmentDB() { delete search; } /**************************************************************************************************/ Sequence AlignmentDB::findClosestSequence(Sequence* seq, float& searchScore) const { try{ vector scores; vector spot = search->findClosestSequences(seq, 1, scores); if (spot.size() != 0) { searchScore = scores[0]; return templateSequences[spot[0]]; } else { searchScore = 0; return emptySequence; } } catch(exception& e) { m->errorOut(e, "AlignmentDB", "findClosestSequence"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/alignmentdb.h000077500000000000000000000020201424121717000221300ustar00rootroot00000000000000#ifndef ALIGNMENTDB_H #define ALIGNMENTDB_H /* * alignmentdb.h * Mothur * * Created by westcott on 11/4/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "sequence.hpp" #include "searchdatabase.hpp" #include "utils.hpp" #include "currentfile.h" /**************************************************************************************************/ class AlignmentDB { public: AlignmentDB(string, string, int, float, float, float, float, int, bool); //reads fastafile passed in and stores sequences AlignmentDB(string); ~AlignmentDB(); Sequence findClosestSequence(Sequence*, float&) const; //sequence to align, searchScore int getLongestBase() { return longest; } private: int numSeqs, longest, threadID; string method; SearchDatabase* search; vector templateSequences; Sequence emptySequence; MothurOut* m; CurrentFile* current; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/aminoacid.cpp000066400000000000000000000211171424121717000221300ustar00rootroot00000000000000// // codon.cpp // Mothur // // Created by Sarah Westcott on 5/24/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "aminoacid.hpp" /******************************************************************************************************************/ AminoAcid::AminoAcid() { try { m = MothurOut::getInstance(); indexes['A'] = 0; indexes['T'] = 1; indexes['G'] = 2; indexes['C'] = 3; indexes['N'] = 4; indexes['-'] = 5; indexes['.'] = 5; setAmino('?'); } catch(exception& e) { m->errorOut(e, "AminoAcid", "AminoAcid"); exit(1); } } /******************************************************************************************************************/ AminoAcid::AminoAcid(char c) { try { m = MothurOut::getInstance(); indexes['A'] = 0; indexes['T'] = 1; indexes['G'] = 2; indexes['C'] = 3; indexes['N'] = 4; indexes['-'] = 5; indexes['.'] = 5; setAmino(c); } catch(exception& e) { m->errorOut(e, "AminoAcid", "AminoAcid"); exit(1); } } /******************************************************************************************************************/ //requires the codon to be 3 characters long. Only valid characters are a,t,g,c,n. AminoAcid::AminoAcid(string codon) { try { m = MothurOut::getInstance(); indexes['A'] = 0; indexes['T'] = 1; indexes['G'] = 2; indexes['C'] = 3; indexes['N'] = 4; indexes['-'] = 5; indexes['.'] = 5; char amino = findAmino(codon); setAmino(amino); } catch(exception& e) { m->errorOut(e, "AminoAcid", "AminoAcid"); exit(1); } } /******************************************************************************************************************/ void AminoAcid::setAmino(char c) { try { c = toupper(c); if (m->validAminoAcids.count(c) != 0) { aminoBase = c; getName(); //sets name, number and compressed dna }else { m->mothurOut("[ERROR]: " + toString(c) + " is an invalid amino acid, please correct.\n"); m->setControl_pressed(true); } } catch(exception& e) { m->errorOut(e, "AminoAcid", "setAmino"); exit(1); } } /******************************************************************************************************************/ char AminoAcid::findAmino(string codon) { try { char amino = '?'; if (codon.length() != 3) { m->mothurOut("[ERROR]: " + codon + " is not the correct length. Codons must be 3 characters long, quitting.\n"); m->setControl_pressed(true); return amino; } int index1 = -1; int index2 = -1; int index3 = -1; it = indexes.find(codon[0]); if (it != indexes.end()) { index1 = it->second; } else { m->mothurOut("[ERROR]: " + toString(codon[0]) + " is not A, T, G, C, or N, quitting.\n"); m->setControl_pressed(true); return amino; } it = indexes.find(codon[1]); if (it != indexes.end()) { index2 = it->second; } else { m->mothurOut("[ERROR]: " + toString(codon[1]) + " is not A, T, G, C, or N, quitting.\n"); m->setControl_pressed(true); return amino; } it = indexes.find(codon[2]); if (it != indexes.end()) { index3 = it->second; } else { m->mothurOut("[ERROR]: " + toString(codon[2]) + " is not A, T, G, C, or N, quitting.\n"); m->setControl_pressed(true); return amino; } if ((index1 == 5) && (index2 == 5) && (index3 == 5)) { amino = '-'; return amino; } //if no N's then the set should contain one amino acid. if N's, then try all possible values for N in that position. //for example:ACN -> Threonine (T) because ACA,ACT,ACG,ACC all map to Threonine // but GAN -> could be Glutamate (E) (for N=A or G) or Aspartate (D) (for N=T or C) if ((index1 > 3) || (index2 > 3) || (index3 > 3)) { //any position of the codon is an N or gap set possibleAminoAcids; if (((index1 > 3) && (index2 > 3)) || ((index1 > 3) && (index3 > 3)) || ((index3 > 3) && (index2 > 3))) { //2 N's or gaps in codon }else{ //only 1 N if (index1 > 3) { possibleAminoAcids.insert(m->codons[0][index2][index3]); possibleAminoAcids.insert(m->codons[1][index2][index3]); possibleAminoAcids.insert(m->codons[2][index2][index3]); possibleAminoAcids.insert(m->codons[3][index2][index3]); }else if (index2 > 3) { possibleAminoAcids.insert(m->codons[index1][0][index3]); possibleAminoAcids.insert(m->codons[index1][1][index3]); possibleAminoAcids.insert(m->codons[index1][2][index3]); possibleAminoAcids.insert(m->codons[index1][3][index3]); }else { possibleAminoAcids.insert(m->codons[index1][index2][0]); possibleAminoAcids.insert(m->codons[index1][index2][1]); possibleAminoAcids.insert(m->codons[index1][index2][2]); possibleAminoAcids.insert(m->codons[index1][index2][3]); } if (possibleAminoAcids.size() == 1) { amino = (*possibleAminoAcids.begin()); } } }else { amino = m->codons[index1][index2][index3]; } return amino; } catch(exception& e) { m->errorOut(e, "AminoAcid", "findAmino"); exit(1); } } /******************************************************************************************************************/ //ala(0), arg(1), asn(2), asp(3), cys(4), gln(5), glu(6), gly(7), his(8), ileu(9), leu(10), lys(11), met(12), phe(13), pro(14), //ser1(15), ser2(16), thr(17), trp(18), tyr(19), val(20), del(21), stop(22), asx(23), glx(24), ser(25), unk(26), quest(27) string AminoAcid::getName() { try { string aminoName = "unknown"; aminoNum = unk; if (aminoBase == 'A') { aminoName = "Alanine"; aminoNum = ala; } //0 else if (aminoBase == 'R') { aminoName = "Arginine"; aminoNum = arg; } //1 else if (aminoBase == 'N') { aminoName = "Asparagine"; aminoNum = asn; } //2 else if (aminoBase == 'D') { aminoName = "Aspartic"; aminoNum = asp; } //3 else if (aminoBase == 'B') { aminoName = "Asparagine or Aspartic"; aminoNum = asx; } //23 else if (aminoBase == 'C') { aminoName = "Cysteine"; aminoNum = cys; } //4 else if (aminoBase == 'Q') { aminoName = "Glutamine"; aminoNum = gln; } //5 else if (aminoBase == 'E') { aminoName = "Glutamic"; aminoNum = glu; } //6 else if (aminoBase == 'Z') { aminoName = "Glutamine or Glutamic_Acid"; aminoNum = glx; } //24 else if (aminoBase == 'G') { aminoName = "Glycine"; aminoNum = gly; } //7 else if (aminoBase == 'H') { aminoName = "Histidine"; aminoNum = his; } //8 else if (aminoBase == 'I') { aminoName = "Isoleucine"; aminoNum = ileu; } //9 else if (aminoBase == 'L') { aminoName = "Leucine"; aminoNum = leu; } //10 else if (aminoBase == 'K') { aminoName = "Lysine"; aminoNum = lys; } //11 else if (aminoBase == 'M') { aminoName = "Methionine"; aminoNum = met; } //12 else if (aminoBase == 'F') { aminoName = "Phenylalanine"; aminoNum = phe; } //13 else if (aminoBase == 'P') { aminoName = "Proline"; aminoNum = pro; } //14 else if (aminoBase == 'S') { aminoName = "Serine"; aminoNum = ser1; } //15 else if (aminoBase == 'T') { aminoName = "Threonine"; aminoNum = thr; } //17 else if (aminoBase == 'W') { aminoName = "Tryptophan"; aminoNum = trp; } //18 else if (aminoBase == 'Y') { aminoName = "Tyrosine"; aminoNum = tyr; } //19 else if (aminoBase == 'V') { aminoName = "Valine"; aminoNum = val; } //20 else if ((aminoBase == '.') || (aminoBase == '-')) { aminoName = "Gap"; aminoNum = del; } //21 else if ((aminoBase == '*') || (aminoBase == 'X')) { aminoName = "STOP"; aminoNum = stop; } //22 else if (aminoBase == '?') { aminoName = "QUESTION"; aminoNum = quest; } //27 return aminoName; } catch(exception& e) { m->errorOut(e, "AminoAcid", "getName"); exit(1); } } /******************************************************************************************************************/ mothur-1.48.0/source/datastructures/aminoacid.hpp000066400000000000000000000050211424121717000221310ustar00rootroot00000000000000// // aminoacid.hpp // Mothur // // Created by Sarah Westcott on 5/24/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef aminoacid_hpp #define aminoacid_hpp #include "mothurout.h" #include "utils.hpp" /* https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables AminoAcids : A,R,N,D,B,C,Q,E,Z,G,H,I,L,K,M,F,P,S,T,W,Y,V AminoAcid DNA codons Compressed Ala, A GCT,GCC,GCA,GCG GCN Arg, R CGT,CGC,CGA,CGG; AGA,AGG CGN,AGR; or CGY,MGR Asn, N AAT,AAC AAY Asp, D GAT,GAC GAY Asn or Asp, B AAT,AAC; GAT,GAC RAY Cys, C TGT,TGC TGY Gln, Q CAA,CAG CAR Glu, E GAA,GAG GAR Gln or Glu, Z CAA,CAG; GAA,GAG SAR Gly, G GGT,GGC,GGA,GGG GGN His, H CAT,CAC CAY Ile, I ATT,ATC,ATA ATH Leu, L CTT,CTC,CTA,CTG; TTA,TTG CTN,TTR; or CTY,YTR Lys, K AAA,AAG AAR Met, M ATG ATG Phe, F TTT,TTC TTY Pro, P CCT,CCC,CCA,CCG CCN Ser, S TCT,TCC,TCA,TCG; AGT,AGC TCN,AGY Thr, T ACT,ACC,ACA,ACG ACN Trp, W TGG TGG Tyr, Y TAT,TAC TAY Val, V GTT,GTC,GTA,GTG GTN START ATG STOP TAA,TGA,TAG TRA,TAR . and - ./- gap */ typedef enum { ala, arg, asn, asp, cys, gln, glu, gly, his, ileu, leu, lys, met, phe, pro, ser1, ser2, thr, trp, tyr, val, del, stop, asx, glx, ser, unk, quest } aas; /**************************************************************************************************/ class AminoAcid { public: AminoAcid(); AminoAcid(char); //AminoAcid character AminoAcid(string); //dna codon length of 3 ~AminoAcid() = default; string getName(); char getAmino() { return aminoBase; } int getNum() { return aminoNum; } void setAmino(char c); protected: MothurOut* m; Utils util; char aminoBase; int aminoNum; map indexes; //A -> 0, T -> 1, G -> 2, C -> 3, N -> 4, Gap -> 5 map::iterator it; char findAmino(string); char getAminoBase(string); //from name int getIndex(char x); }; #endif /* aminoacid_hpp */ mothur-1.48.0/source/datastructures/biom.cpp000066400000000000000000000051301424121717000211270ustar00rootroot00000000000000// // biom.cpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "biom.hpp" /**************************************************************************************************/ Biom::Biom() { try { m = MothurOut::getInstance(); formatURL = "http://biom-format.org"; label = ""; version = ""; tableID = "No Table ID"; mothurVersion = ""; sharedFileName = ""; shared = nullptr; sharedFloat = nullptr; } catch(exception& e) { m->errorOut(e, "Biom", "Biom"); exit(1); } } /**************************************************************************************************/ Biom::Biom(string v) : version(v) { try { m = MothurOut::getInstance(); formatURL = "http://biom-format.org"; label = ""; tableID = "No Table ID"; mothurVersion = ""; sharedFileName = ""; shared = nullptr; sharedFloat = nullptr; } catch(exception& e) { m->errorOut(e, "Biom", "Biom"); exit(1); } } /**************************************************************************************************/ Biom::~Biom() { if (shared != nullptr) { delete shared; } if (sharedFloat != nullptr) { delete sharedFloat; } } /**************************************************************************************************/ void Biom::load(SharedRAbundVectors* s, vector c){ try { shared = new SharedRAbundVectors(*s); consTax = c; matrixElementType = "int"; label = s->getLabel(); } catch(exception& e) { m->errorOut(e, "Biom", "load-shared"); exit(1); } } /**************************************************************************************************/ void Biom::load(SharedRAbundFloatVectors* s, vector c){ try { sharedFloat = new SharedRAbundFloatVectors(*s); consTax = c; matrixElementType = "float"; label = s->getLabel(); vector sharedRabunds = s->getSharedRAbundVectors(); shared = new SharedRAbundVectors(); for (int i = 0; i < sharedRabunds.size(); i++) { if (m->getControl_pressed()) { break; } shared->push_back(sharedRabunds[i]); } } catch(exception& e) { m->errorOut(e, "Biom", "load-float"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/biom.hpp000066400000000000000000000044051424121717000211400ustar00rootroot00000000000000// // biom.hpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef biom_hpp #define biom_hpp #include "utils.hpp" #include "mothurout.h" #include "sharedrabundfloatvectors.hpp" #include "sharedrabundvectors.hpp" #include "phylosummary.h" #include "taxonomy.hpp" #include "picrust.hpp" //http://biom-format.org //http://biom-format.org/documentation/format_versions/biom-1.0.html //http://biom-format.org/documentation/format_versions/biom-2.1.html /**************************************************************************************************/ class Biom { public: Biom(); Biom(string); //version virtual ~Biom(); virtual void read(string) = 0; virtual void load(SharedRAbundVectors* s, vector c); virtual void load(SharedRAbundFloatVectors* s, vector c); virtual void fillHeading(string mv, string sfn) { mothurVersion = mv; sharedFileName = sfn; } virtual void print(string, vector, Picrust*) { } //hdf5 print virtual string getVersion() { return version; } virtual string getMatrixElementType() { return matrixElementType; } virtual SharedRAbundVectors* getSharedRAbundVectors() { return shared; } virtual SharedRAbundFloatVectors* getSharedRAbundFloatVectors() { return sharedFloat; } //otu taxonomies virtual vector getConsTaxonomies() { return consTax; } //sample taxonomies virtual map getGroupTaxonomies() { return groupTaxonomies; } protected: MothurOut* m; Utils util; string matrixFormat, tableType; //examples: tableType = "OTU table", matrixFormat = "sparse" or "dense" string version, formatURL, label, matrixElementType; //version = simple or hdf5, set by child. matrixElementType = "int" or "float" string tableID, mothurVersion, sharedFileName; int maxLevel; SharedRAbundVectors* shared; //always created with read SharedRAbundFloatVectors* sharedFloat; //only created if the matrixElementType is float vector consTax; map groupTaxonomies; }; /**************************************************************************************************/ #endif /* biom_hpp */ mothur-1.48.0/source/datastructures/biomhdf5.cpp000066400000000000000000001365241424121717000217120ustar00rootroot00000000000000// // biomhdf5.cpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "biomhdf5.hpp" /**************************************************************************************************/ BiomHDF5::BiomHDF5(string fname, string l) : Biom("Biological Observation Matrix 2.1.0"){ try { label = l; numOTUs = 0; numSamples = 0; read(fname); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "BiomHDF5"); exit(1); } } /**************************************************************************************************/ BiomHDF5::BiomHDF5() : Biom("Biological Observation Matrix 2.1.0"){ try { numOTUs = 0; numSamples = 0; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "BiomHDF5"); exit(1); } } //********************************************************************************************************************** #ifdef USE_HDF5 bool pathExists(hid_t id, const string& path) { return (H5Lexists( id, path.c_str(), H5P_DEFAULT ) > 0); } #endif /**************************************************************************************************/ void BiomHDF5::read(string fname){ try { nnz = 0; maxLevel = 0; otuNames.clear(); sampleNames.clear(); taxonomy.clear(); otuTaxonomies.clear(); #ifdef USE_HDF5 Picrust* picrust; vector metadata; H5::H5File file( fname.c_str(), H5F_ACC_RDONLY ); readAttributes(file); if (m->getControl_pressed()) { return; } try { //read otu names if (pathExists(file.getId(), "observation/")) { H5::Group group(file.openGroup("observation/")); otuNames = readNames(file, group, "ids"); if (m->getControl_pressed()) { return; } group.close(); }else { m->mothurOut("[ERROR]: Missing /""observation/ids/"" needed for OTU names.\n"); m->setControl_pressed(true); } }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: reading /""observation/ids/"" needed for OTU names.\n"); m->setControl_pressed(true); } try { //read group names if (pathExists(file.getId(), "sample/")) { H5::Group group(file.openGroup("sample/")); sampleNames = readNames(file, group, "ids"); if (m->getControl_pressed()) { return; } group.close(); }else { m->mothurOut("[ERROR]: Missing /""sample/ids/"" needed for group names.\n"); m->setControl_pressed(true); } }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: reading /""sample/ids/"" needed for group names.\n"); m->setControl_pressed(true); } bool hasConsTaxonomy = false; try { //read otu taxonomies if (pathExists(file.getId(), "observation/metadata/")) { H5::Group group(file.openGroup("observation/metadata/")); readTaxonomy(group, "taxonomy"); if (m->getControl_pressed()) { return; } group.close(); if (otuTaxonomies.size() == otuNames.size()) { hasConsTaxonomy = true; } } }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: reading /""observation/metadata/taxonomy"".\n"); hasConsTaxonomy = false; m->setControl_pressed(true); } try { //read otu abundances if (pathExists(file.getId(), "observation/matrix/")) { H5::Group group(file.openGroup("observation/matrix/")); vector datasets; datasets.push_back("data"); datasets.push_back("indices"); datasets.push_back("indptr"); datasets.push_back("label"); label = readOTUAbundances(group, datasets); if (m->getControl_pressed()) { return; } group.close(); }else { m->mothurOut("[ERROR]: Missing /""observation/matrix/"" needed for OTU abundances.\n"); m->setControl_pressed(true); } }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: reading /""observation/matrix/"" needed for OTU abundances.\n"); m->setControl_pressed(true); } bool error = false; if (nnz != otudata.size()) { error = true; } //create shared file sort(sampleNames.begin(), sampleNames.end()); if (shared != nullptr) { delete shared; } shared = new SharedRAbundVectors(); //create empty sharedrabundvectors so we can add otus below for (int i = 0; i < sampleNames.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setGroup(sampleNames[i]); shared->push_back(temp); } shared->setLabels(label); if (matrixElementType == "float") { if (sharedFloat != nullptr) { delete sharedFloat; } sharedFloat = new SharedRAbundFloatVectors(); //creates new sharedRAbunds for (int i = 0; i < sampleNames.size(); i++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(shared->getNumBins()); //sets all abunds to 0 temp->setLabel(label); temp->setGroup(sampleNames[i]); sharedFloat->push_back(temp); } sharedFloat->setLabels(label); } //for each otu int count = 0; for (int h = 0; h < indptr.size()-1; h++) { int otuStart = indptr[h]; int otuEnd = indptr[h+1]; vector otuAbunds; otuAbunds.resize(sampleNames.size(), 0); //initialze otus sample abundances to 0 - only non zero abunds are recorded vector otuFloatAbunds; otuFloatAbunds.resize(sampleNames.size(), 0); //initialze otus sample abundances to 0 - only non zero abunds are recorded for (int i = otuStart; i < otuEnd; i++) { otuAbunds[indices[i]] = (int)otudata[count]; otuFloatAbunds[indices[i]] = otudata[count]; count++; } shared->push_back(otuAbunds, otuNames[h]); if (matrixElementType == "float") { sharedFloat->push_back(otuFloatAbunds, otuNames[h]); } } if (hasConsTaxonomy) { if (shared->getNumBins() == otuTaxonomies.size()) { for (int i = 0; i < otuTaxonomies.size(); i++) { if (m->getControl_pressed()) { break; } string thisOTUsTax = otuTaxonomies[i]; string newTax = util.addUnclassifieds(thisOTUsTax, maxLevel, false); Taxonomy thisOTUsTaxonomy(otuNames[i], newTax, shared->getOTUTotal(i)); consTax.push_back(thisOTUsTaxonomy); } } } if (sampleNames.size() == taxonomy.size()) { for (int i = 0; sampleNames.size(); i++) { if (m->getControl_pressed()) { break; } groupTaxonomies[sampleNames[i]] = taxonomy[i]; } } file.close(); #endif } catch(exception& e) { m->errorOut(e, "BiomHDF5", "read"); exit(1); } } #ifdef USE_HDF5 //********************************************************************************************************************** //Group = "observation/" or "sample/", datasetName = "ids" vector BiomHDF5::readNames( H5::H5File& file, H5::Group& group, string datasetname) { try { vector items; hsize_t numObjects = group.getNumObjs(); if (numObjects != 0) { //we have this group H5::DataSet dataset = group.openDataSet(datasetname.c_str()); H5::DataType dataType(dataset.getDataType()); H5::DataSpace dataSpace = dataset.getSpace(); int rank = dataSpace.getSimpleExtentNdims(); //number of dimensions, should be 1 hsize_t dims[rank]; dataSpace.getSimpleExtentDims(dims); //size of each dimension char **data = new char*[dims[0]]; H5::StrType str_type(H5::PredType::C_S1, H5T_VARIABLE); dataset.read((void*)data, str_type); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + datasetname + " = "); } for (int i = 0; i < dims[0]; i++) { if (m->getDebug()) { m->mothurOut(toString(data[i]) + "\t"); } items.push_back(data[i]); delete[] data[i]; } if (m->getDebug()) { m->mothurOutEndLine(); } delete[] data; dataset.close(); } return items; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readNames"); exit(1); } } //********************************************************************************************************************** //Group = "observation/metadata", datasetName = "taxonomy" int BiomHDF5::readTaxonomy( H5::Group& group, string datasetName) { try { hsize_t numObjects = group.getNumObjs(); if (numObjects != 0) { //we have this group H5::DataSet dataset = group.openDataSet(datasetName); H5::DataType dataType(dataset.getDataType()); H5::DataSpace dataSpace = dataset.getSpace(); H5::StrType str_type(H5::PredType::C_S1, H5T_VARIABLE); int rank = dataSpace.getSimpleExtentNdims(); //number of dimensions hsize_t dims[rank]; dataSpace.getSimpleExtentDims(dims); //size of each dimension if (dataset.getTypeClass() == H5T_STRING) { int numberOfTaxonomies = dims[0]; if (rank == 2) { numberOfTaxonomies *= dims[1]; } char **data = new char*[numberOfTaxonomies]; dataset.read((void*)data, dataType); if (rank == 1) { for (int i = 0; i < numberOfTaxonomies; i++) { if (m->getDebug()) { m->mothurOut(toString(data[i]) + "\t"); } taxonomy.push_back(data[i]); delete[] data[i]; } delete[] data; }else if (rank == 2) { string otuTaxonomy = ""; int count = 0; for (int i = 0; i < numberOfTaxonomies; i++) { otuTaxonomy += data[i]; otuTaxonomy += ";"; count++; if (count == dims[1]) { if (m->getDebug()) { m->mothurOut("[DEBUG]: " + toString(otuTaxonomy) + "\n"); } otuTaxonomies.push_back(otuTaxonomy); if (count > maxLevel) { maxLevel = count; } count = 0; otuTaxonomy = ""; } } } } dataset.close(); } return numObjects; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readTaxonomy"); exit(1); } } //********************************************************************************************************************** //Group = "observation/matrix" string BiomHDF5::readOTUAbundances( H5::Group& group, vector datasets) { try { string thisLabel = ""; hsize_t numObjects = group.getNumObjs(); if (numObjects == 0) { return thisLabel; } for (int h = 0; h < datasets.size(); h++) { H5std_string datasetName = datasets[h]; if (pathExists(group.getId(), datasetName)) { H5::DataSet dataset = group.openDataSet(datasetName); H5::DataSpace dataSpace = dataset.getSpace(); if (dataset.getTypeClass() == H5T_INTEGER) { int rank = dataSpace.getSimpleExtentNdims(); //number of dimensions, should be 1 hsize_t dims[rank]; dataSpace.getSimpleExtentDims(dims); //size of each dimension matrixElementType = "int"; if (rank == 1) { int* data = new int[dims[0]]; H5::DataSpace data_mspace(rank, dims); dataset.read(data, H5::PredType::NATIVE_INT, data_mspace, dataSpace); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + datasetName + " = "); } for (int i = 0; i < dims[0]; i++) { if (m->getDebug()) { m->mothurOut(toString(data[i]) + "\t"); } if (datasetName == "data") { otudata.push_back(data[i]); } else if (datasetName == "indices") { indices.push_back(data[i]); } else if (datasetName == "indptr") { indptr.push_back(data[i]); } } if (m->getDebug()) { m->mothurOutEndLine(); } delete[] data; } }else if (dataset.getTypeClass() == H5T_FLOAT) { int rank = dataSpace.getSimpleExtentNdims(); //number of dimensions, should be 1 hsize_t dims[rank]; dataSpace.getSimpleExtentDims(dims); //size of each dimension matrixElementType = "float"; if (rank == 1) { float* data = new float[dims[0]]; H5::DataSpace data_mspace(rank, dims); dataset.read(data, H5::PredType::NATIVE_FLOAT, data_mspace, dataSpace); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + datasetName + " = "); } for (int i = 0; i < dims[0]; i++) { if (m->getDebug()) { m->mothurOut(toString(data[i]) + "\t"); } if (datasetName == "data") { otudata.push_back(data[i]); } else if (datasetName == "indices") { indices.push_back((int)data[i]); } else if (datasetName == "indptr") { indptr.push_back((int)data[i]); } } if (m->getDebug()) { m->mothurOutEndLine(); } delete[] data; } }else if (dataset.getTypeClass() == H5T_STRING) { H5::StrType strdatatype(H5::PredType::C_S1, H5T_VARIABLE); H5std_string value; dataset.read(value, strdatatype); thisLabel = value; } dataset.close(); } } return thisLabel; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readOTUAbundances"); exit(1); } } //********************************************************************************************************************** //read attribute of group string BiomHDF5::readStringAttributes(H5::Group& fileAttributes, string name) { try { H5::Attribute attribute(fileAttributes.openAttribute(name)); H5std_string attributeName; attribute.getName(attributeName); H5::DataType attributeType(attribute.getDataType()); string attributeValue = ""; // Read the Attribute Data. Depends on the kind of data if (attributeType.getClass() == H5T_STRING) { H5std_string value; attribute.read(attributeType, value); attributeValue = value; if (m->getDebug()) { m->mothurOut("[DEBUG]: " + attributeName + " = " + value + "\n"); } } attribute.close(); return attributeValue; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readStringAttributes"); exit(1); } } //********************************************************************************************************************** //read attribute of group void BiomHDF5::readIntAttributes(H5::Group& fileAttributes, string name) { try { H5::Attribute attribute(fileAttributes.openAttribute(name)); H5std_string attributeName; attribute.getName(attributeName); H5::DataType attributeType(attribute.getDataType()); H5::DataSpace attDataSpace = attribute.getSpace(); int rank = attDataSpace.getSimpleExtentNdims(); //number of dimensions hsize_t dims[rank]; attDataSpace.getSimpleExtentDims(dims); //size of each dimension // Read the Attribute Data. Depends on the kind of data if (attributeType.getClass() == H5T_INTEGER) { if (attDataSpace.isSimple()) { if (rank == 0) { hsize_t data = 0; attribute.read(attributeType, &data); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + attributeName + " = " + toString(data) + "\n"); } if (attributeName == "nnz") { nnz = data; } }else if (rank == 1) { hsize_t data[dims[0]]; attribute.read(attributeType, data); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + attributeName + " = "); } for (int i = 0; i < dims[0]; i++) { if (m->getDebug()) { m->mothurOut(toString(data[i]) + "\t"); } if (attributeName == "nnz") { nnz = data[i]; } } if (m->getDebug()) { m->mothurOutEndLine(); } if (attributeName == "shape") { if (dims[0] == 2) { numOTUs = data[0]; numSamples = data[1]; } } } } } attribute.close(); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readIntAttributes"); exit(1); } } //********************************************************************************************************************** //Process attribute of group or dataset void BiomHDF5::readAttributes(H5::H5File& file) { try { H5::Group fileAttributes(file.openGroup( "/" )); //read table id tableID = readStringAttributes(fileAttributes, "id"); //read table type tableType = readStringAttributes(fileAttributes, "type"); //read format-url formatURL = readStringAttributes(fileAttributes, "format-url"); //read shape readIntAttributes(fileAttributes, "shape"); //read number non zero readIntAttributes(fileAttributes, "nnz"); fileAttributes.close(); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "readAttributes"); exit(1); } } //********************************************************************************************************************** //print required dataset attributes void BiomHDF5::printRequiredFileAttributes(H5::Group& fileAttributes, int numBins, int numSamples) { try { H5::DataSpace attr_dataspace = H5::DataSpace(H5S_SCALAR); // Create new dataspace for attribute H5::StrType strdatatype(H5::PredType::C_S1, H5T_VARIABLE); H5std_string idValue(tableID); H5::Attribute idAttribute = fileAttributes.createAttribute("id", strdatatype, attr_dataspace); idAttribute.write(strdatatype, idValue); H5std_string typeValue(tableType); H5::Attribute typeAttribute = fileAttributes.createAttribute("type", strdatatype, attr_dataspace); typeAttribute.write(strdatatype, typeValue); H5std_string formatUrl(formatURL); H5::Attribute urlAttribute = fileAttributes.createAttribute("format-url", strdatatype, attr_dataspace); urlAttribute.write(strdatatype, formatUrl); H5std_string generatedByValue(mothurVersion); H5::Attribute generatedByAttribute = fileAttributes.createAttribute("generated-by", strdatatype, attr_dataspace); generatedByAttribute.write(strdatatype, generatedByValue); time_t rawtime; struct tm * timeinfo; time ( &rawtime ); timeinfo = localtime ( &rawtime ); string dateString = asctime (timeinfo); int pos = dateString.find('\n'); if (pos != string::npos) { dateString = dateString.substr(0, pos);} H5std_string dataValue(dateString); H5::Attribute dateAttribute = fileAttributes.createAttribute("creation-date", strdatatype, attr_dataspace); dateAttribute.write(strdatatype, dataValue); hsize_t dims[1]; dims[0] = 2; H5::DataSpace dataspace( 1, dims ); hsize_t data[2]; data[0] = 2; data[1] = 1; H5::Attribute formatVersionAttribute = fileAttributes.createAttribute("format-version", H5::PredType::NATIVE_INT, dataspace); formatVersionAttribute.write(H5::PredType::NATIVE_INT, data); data[0] = numBins; data[1] = numSamples; H5::Attribute shapeAttribute = fileAttributes.createAttribute("shape", H5::PredType::NATIVE_INT, dataspace); shapeAttribute.write(H5::PredType::NATIVE_INT, data); hsize_t nnzValue = nnz; H5::Attribute nnzAttribute = fileAttributes.createAttribute("nnz", H5::PredType::NATIVE_INT, attr_dataspace); nnzAttribute.write(H5::PredType::NATIVE_INT, &nnzValue); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printRequiredAttributes"); exit(1); } } //********************************************************************************************************************** //print otuNames //"observation/ids" -> otuLabels - "GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5 //"sample/ids" -> group names - "Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6" void BiomHDF5::printNames(H5::Group& group, vector names, string datasetname) { try { hsize_t dimsf[1]; dimsf[0] = names.size(); H5::DataSpace dataspace( 1, dimsf ); H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE); //fill data with names char* data[dimsf[0]]; for (int i = 0; i < names.size(); i++) { data[i] = (char*) names[i].c_str(); } const H5std_string DATASET_NAME( datasetname.c_str() ); H5::DataSet dataset = group.createDataSet( DATASET_NAME, datatype, dataspace ); dataset.write( data, datatype ); dataset.close(); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printNames"); exit(1); } } //********************************************************************************************************************** //"observation/metadata/taxonomy" -> taxonomy info - otu classifications void BiomHDF5::printOTUTaxonomy(H5::Group& group, string datasetname) { try { int maxLevel = consTax[0].getNumLevels(); hsize_t dimsf[2]; dimsf[0] = consTax.size(); dimsf[1] = maxLevel; H5::DataSpace dataspace(2, dimsf); //2D array, (numOtus x vector) H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE); const H5std_string DATASET_NAME( datasetname.c_str() ); H5::DataSet dataset = group.createDataSet( DATASET_NAME, datatype, dataspace ); vector cPara; for (int i = 0; i < consTax.size(); i++) { if (m->getControl_pressed()) { break; } vector thisOtusTaxonomy = consTax[i].getSimpleTaxons(); for (int j = 0; j < maxLevel; j++) { cPara.push_back(util.mothurConvert(thisOtusTaxonomy[j])); } } char** data; data = new char*[cPara.size()]; for (int i = 0; i < cPara.size(); i++) { data[i] = cPara[i]; } dataset.write(data, datatype); dataset.close(); //free memory for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] data; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printOTUTaxonomy"); exit(1); } } //************************************************************************************************************** //"observation/matrix/data" -> otu abundances for each non zero abundnace entry - 1, 5, 1, 2, 3, 1, 1, 4, 2, 2, 1, 1, 1, 1, 1 //"observation/matrix/indices" -> index of group - maps into samples/ids 2, 0, 1, 3, 4, 5, 2, 3, 5, 0, 1, 2, 5, 1, 2 //"observation/matrix/indptr" -> maps non zero abundance to OTU - 0, 1, 6, 9, 13, 15 - 0 start of OTU1s indexes, 1 start of OTU2s indexes, ... 15 start of OTU5s indexes /* label group numOtus GG_OTU_1 GG_OTU_2 GG_OTU_3 GG_OTU_4 GG_OTU_5 userLabel Sample1 0 5 0 2 0 userLabel Sample2 0 1 0 2 1 userLabel Sample3 1 0 1 1 1 userLabel Sample4 0 2 4 0 0 userLabel Sample5 0 3 0 0 0 userLabel Sample6 0 1 2 1 0 */ //group = "observation/matrix/"; void BiomHDF5::printOTUAbundances(H5::Group& group, int numBins, int numSamples, string label, bool useRelabund=false) { try { int otusStartIndex = 0; vector indptr, indices, abunds; vector abundsFloat; //fill indices, indptr and data vectors for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { return; } vector thisOtusAbundances; vector thisOtusFloatAbundances; float zero = 0.0; if (useRelabund) { thisOtusFloatAbundances = sharedFloat->getOTU(i); } else { thisOtusAbundances = shared->getOTU(i); } indptr.push_back(otusStartIndex); for (int j = 0; j < numSamples; j++) { if (useRelabund) { if (util.isEqual(thisOtusFloatAbundances[j], zero)) {} //skip zero values else { otusStartIndex++; //update number of non zero values for this OTU - use to create indptr values indices.push_back(j); //index to sample providing this abund abundsFloat.push_back(thisOtusFloatAbundances[j]); //save this samples OTU abundance } }else { if (thisOtusAbundances[j] == 0) {} //skip zero values else { otusStartIndex++; //update number of non zero values for this OTU - use to create indptr values indices.push_back(j); //index to sample providing this abund abunds.push_back(thisOtusAbundances[j]); //save this samples OTU abundance } } } } // dataset dimensions hsize_t dimsf[1]; dimsf[0] = nnz; H5::DataSpace dataspace( 1, dimsf ); //dataspace 1 x nnz int data[nnz]; for (int i = 0; i < nnz; i++) { data[i] = indices[i]; } //fill data with indices const H5std_string DATASET_NAME( "indices" ); H5::DataSet dataset = group.createDataSet( DATASET_NAME, H5::PredType::NATIVE_INT, dataspace ); dataset.write( data, H5::PredType::NATIVE_INT ); dataset.close(); //create data dataset - type depends on whether or not we are using the relabund values if (useRelabund) { //print float float dataFloat[nnz]; for (int i = 0; i < nnz; i++) { dataFloat[i] = abundsFloat[i]; } //fill data with abunds const H5std_string DATASET_NAME( "data" ); H5::DataSet dataset = group.createDataSet( DATASET_NAME, H5::PredType::NATIVE_FLOAT, dataspace ); dataset.write( dataFloat, H5::PredType::NATIVE_FLOAT ); dataset.close(); }else { //print shared for (int i = 0; i < nnz; i++) { data[i] = abunds[i]; } //fill data with abunds const H5std_string DATASET_NAME( "data" ); H5::DataSet dataset = group.createDataSet( DATASET_NAME, H5::PredType::NATIVE_INT, dataspace ); dataset.write( data, H5::PredType::NATIVE_INT ); dataset.close(); } //create indptr dataset dimsf[0] = numBins+1; H5::DataSpace dataspaceIndptr(1, dimsf); //dataspace 1 x numBins int dataIndptr[numBins+1]; for (int i = 0; i < numBins; i++) { dataIndptr[i] = indptr[i]; } //fill data with indptr dataIndptr[numBins] = nnz; const H5std_string DATASET_NAME_INDPTR( "indptr" ); H5::DataSet datasetIndptr = group.createDataSet( DATASET_NAME_INDPTR, H5::PredType::NATIVE_INT, dataspaceIndptr ); datasetIndptr.write(dataIndptr, H5::PredType::NATIVE_INT); datasetIndptr.close(); //create label dataset to store shared label H5::DataSpace attr_dataspace = H5::DataSpace(H5S_SCALAR); // Create new dataspace for attribute H5::StrType strdatatype(H5::PredType::C_S1, H5T_VARIABLE); const H5std_string DATASET_NAME_LABEL( "label" ); H5::DataSet labelDataset = group.createDataSet( DATASET_NAME_LABEL, strdatatype, attr_dataspace ); labelDataset.write(label, strdatatype); labelDataset.close(); } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printOTUAbundances"); exit(1); } } #endif //********************************************************************************************************************** void BiomHDF5::printShared(string outputFileName, vector sampleMetadata, Picrust* picrust) { try { //set required datasets - groupname -> datasetname //"observation/ids" -> otuLabels - "GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5 //"observation/matrix/data" -> otu abundances for each non zero abundnace entry - 1, 5, 1, 2, 3, 1, 1, 4, 2, 2, 1, 1, 1, 1, 1 //"observation/matrix/indices" -> index of group - maps into samples/ids 2, 0, 1, 3, 4, 5, 2, 3, 5, 0, 1, 2, 5, 1, 2 //"observation/matrix/indptr" -> maps non zero abundance to OTU - 0, 1, 6, 9, 13, 15 - 0 start of OTU1s indexes, 1 start of OTU2s indexes, ... 15 start of OTU5s indexes /* label group numOtus GG_OTU_1 GG_OTU_2 GG_OTU_3 GG_OTU_4 GG_OTU_5 userLabel Sample1 5 0 5 0 2 0 userLabel Sample2 5 0 1 0 2 1 userLabel Sample3 5 1 0 1 1 1 userLabel Sample4 5 0 2 4 0 0 userLabel Sample5 5 0 3 0 0 0 userLabel Sample6 5 0 1 2 1 0 */ //"observation/metadata/taxonomy" -> taxonomy info - otu classifications //"sample/ids" -> group names - "Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6" //"sample/metadata/" -> group metadata (optional) //run this first because if picrust alters the shared vector we will need to use the updated info //taxMetadata[0] = taxonomy for otu0 vector< vector > taxMetadata = getMetaData(picrust); //find number of non zero otus nnz = 0; for (int j = 0; j < shared->getNumBins(); j++) { vector thisOTU = shared->getOTU(j); for (int i = 0; i < thisOTU.size(); i++) { if (thisOTU[i] != 0) { nnz++; } } } #ifdef USE_HDF5 H5::H5File file(outputFileName.c_str(), H5F_ACC_TRUNC ); try { //print required file attributes H5::Group fileAttributes(file.openGroup( "/" )); printRequiredFileAttributes(fileAttributes, shared->getNumBins(), shared->size()); //id, type, format-url, format-version, generated-by, creation-date, shape, nnz fileAttributes.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print H5 required file attributes.\n"); m->setControl_pressed(true); } try { //print otuLabels called "observation/ids" in biom file H5::Group observationGroup( file.createGroup( "observation" )); printNames(observationGroup, shared->getOTUNames(), "ids"); observationGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otuLabels in 'observation/ids' group.\n"); m->setControl_pressed(true); } try { //print group names called "sample/ids" in biom file H5::Group sampleGroup( file.createGroup( "sample" )); printNames(sampleGroup, shared->getNamesGroups(), "ids"); if (sampleMetadata.size() != 0) { printNames(sampleGroup, sampleMetadata, "metadata"); } sampleGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print sample names or sample metadata.\n"); m->setControl_pressed(true); } try { //print otuAbundances called "observation/matrix/" (data, indicies, indptr) in biom file H5::Group matrixGroup( file.createGroup( "observation/matrix/" )); printOTUAbundances(matrixGroup, shared->getNumBins(), shared->size(), shared->getLabel()); matrixGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otu abundances in 'observation/matrix/' group.\n"); m->setControl_pressed(true); } if (consTax.size() != 0) { try { //print otuTaxonomies called "observation/metadata/taxonomy" in the biom file H5::Group taxonomyGroup( file.createGroup( "observation/metadata/" )); printOTUTaxonomy(taxonomyGroup, "taxonomy"); taxonomyGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otu consensus taxonomies in 'observation/metadata/' group.\n"); m->setControl_pressed(true); } } file.close(); #endif } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printShared"); exit(1); } } //********************************************************************************************************************** void BiomHDF5::printFloat(string outputFileName, vector sampleMetadata, Picrust* picrust) { try { //set required datasets - groupname -> datasetname //"observation/ids" -> otuLabels - "GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5 //"observation/matrix/data" -> otu abundances for each non zero abundnace entry - 1, 5, 1, 2, 3, 1, 1, 4, 2, 2, 1, 1, 1, 1, 1 //"observation/matrix/indices" -> index of group - maps into samples/ids 2, 0, 1, 3, 4, 5, 2, 3, 5, 0, 1, 2, 5, 1, 2 //"observation/matrix/indptr" -> maps non zero abundance to OTU - 0, 1, 6, 9, 13, 15 - 0 start of OTU1s indexes, 1 start of OTU2s indexes, ... 15 start of OTU5s indexes /* label group numOtus GG_OTU_1 GG_OTU_2 GG_OTU_3 GG_OTU_4 GG_OTU_5 userLabel Sample1 5 0 5 0 2 0 userLabel Sample2 5 0 1 0 2 1 userLabel Sample3 5 1 0 1 1 1 userLabel Sample4 5 0 2 4 0 0 userLabel Sample5 5 0 3 0 0 0 userLabel Sample6 5 0 1 2 1 0 */ //"observation/metadata/taxonomy" -> taxonomy info - otu classifications //"sample/ids" -> group names - "Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6" //"sample/metadata/" -> group metadata (optional) //run this first because if picrust alters the shared vector we will need to use the updated info //taxMetadata[0] = taxonomy for otu0 vector< vector > taxMetadata = getMetaData(picrust); //find number of non zero otus nnz = 0; float zero = 0.0; for (int j = 0; j < sharedFloat->getNumBins(); j++) { vector thisOTU = sharedFloat->getOTU(j); for (int i = 0; i < thisOTU.size(); i++) { if (util.isEqual(thisOTU[i], zero)) { nnz++; } } } #ifdef USE_HDF5 H5::H5File file(outputFileName.c_str(), H5F_ACC_TRUNC ); try { //print required file attributes H5::Group fileAttributes(file.openGroup( "/" )); printRequiredFileAttributes(fileAttributes, sharedFloat->getNumBins(), sharedFloat->size()); //id, type, format-url, format-version, generated-by, creation-date, shape, nnz fileAttributes.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print H5 required file attributes.\n"); m->setControl_pressed(true); } try { //print otuLabels called "observation/ids" in biom file H5::Group observationGroup( file.createGroup( "observation" )); printNames(observationGroup, sharedFloat->getOTUNames(), "ids"); observationGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otuLabels in 'observation/ids' group.\n"); m->setControl_pressed(true); } try { //print group names called "sample/ids" in biom file H5::Group sampleGroup( file.createGroup( "sample" )); printNames(sampleGroup, sharedFloat->getNamesGroups(), "ids"); if (sampleMetadata.size() != 0) { printNames(sampleGroup, sampleMetadata, "metadata"); } sampleGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print sample names or sample metadata.\n"); m->setControl_pressed(true); } try { //print otuAbundances called "observation/matrix/" (data, indicies, indptr) in biom file H5::Group matrixGroup( file.createGroup( "observation/matrix/" )); printOTUAbundances(matrixGroup, sharedFloat->getNumBins(), sharedFloat->size(), sharedFloat->getLabel()); matrixGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otu abundances in 'observation/matrix/' group.\n"); m->setControl_pressed(true); } if (consTax.size() != 0) { try { //print otuTaxonomies called "observation/metadata/taxonomy" in the biom file H5::Group taxonomyGroup( file.createGroup( "observation/metadata/" )); printOTUTaxonomy(taxonomyGroup, "taxonomy"); taxonomyGroup.close(); }catch(H5::Exception& e){ //do nothing taxonomy info does not exist m->mothurOut("[ERROR]: Unable to print otu consensus taxonomies in 'observation/metadata/' group.\n"); m->setControl_pressed(true); } } file.close(); #endif } catch(exception& e) { m->errorOut(e, "BiomHDF5", "printFloat"); exit(1); } } //********************************************************************************************************************** void BiomHDF5::print(string outputFileName, vector sampleMetadata, Picrust* picrust) { try { if (matrixElementType == "int") { printShared(outputFileName, sampleMetadata, picrust); }else { printFloat(outputFileName, sampleMetadata, picrust); } } catch(exception& e) { m->errorOut(e, "BiomHDF5", "print"); exit(1); } } //********************************************************************************************************************** vector< vector > BiomHDF5::getMetaData(Picrust* picrust, bool useRelabund){ try { vector< vector > metadata; if (consTax.size() == 0) { if (!useRelabund) { for (int i = 0; i < shared->getNumBins(); i++) { vector temp; temp.push_back("null"); metadata.push_back(temp); } } else { for (int i = 0; i < sharedFloat->getNumBins(); i++) { vector temp; temp.push_back("null"); metadata.push_back(temp); } } } else { if (!useRelabund) { if (shared == nullptr) { m->setControl_pressed(true); return metadata; } }else { if (sharedFloat == nullptr) { m->setControl_pressed(true); return metadata; } } //should the labels be Otu001 or PhyloType001 vector otuNames; if (!useRelabund) { otuNames = shared->getOTUNames(); } else { otuNames = sharedFloat->getOTUNames(); } string firstBin = otuNames[0]; string binTag = "Otu"; if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; } map labelTaxMap; string snumBins = toString(otuNames.size()); for (int i = 0; i < consTax.size(); i++) { if (m->getControl_pressed()) { return metadata; } string thisOtuLabel = consTax[i].getName(); //if there is a bin label use it otherwise make one if (util.isContainingOnlyDigits(thisOtuLabel)) { string binLabel = binTag; string sbinNumber = thisOtuLabel; if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; binLabel = util.getSimpleLabel(binLabel); labelTaxMap[binLabel] = consTax[i].getConsTaxString(); }else { map::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel)); if (it == labelTaxMap.end()) { labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString(); }else { m->mothurOut("[ERROR]: Cannot add OTULabel " + thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true); } } } //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file if (!useRelabund) { if (shared->getNumBins() != labelTaxMap.size()) { m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(shared->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata; } }else { if (sharedFloat->getNumBins() != labelTaxMap.size()) { m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(sharedFloat->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata; } } //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug. if (picrust != nullptr) { if (!useRelabund) { picrust->setGGOTUIDs(labelTaxMap, shared); } else { picrust->setGGOTUIDs(labelTaxMap, sharedFloat); } } //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]} //traverse the binLabels forming the metadata strings and saving them //make sure to sanity check map::iterator it; vector currentLabels; int numBins = 0; if (!useRelabund) { currentLabels = shared->getOTUNames(); numBins = shared->getNumBins(); } else { currentLabels = sharedFloat->getOTUNames(); numBins = sharedFloat->getNumBins(); } for (int i = 0; i < numBins; i++) { if (m->getControl_pressed()) { return metadata; } it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i])); if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); } else { vector scores; vector taxonomies = util.parseTax(it->second, scores); metadata.push_back(taxonomies); } } } return metadata; } catch(exception& e) { m->errorOut(e, "BiomHDF5", "getMetadata"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/biomhdf5.hpp000066400000000000000000000106671424121717000217160ustar00rootroot00000000000000// // biomhdf5.hpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // //#ifdef USE_HDF5 //#endif #ifndef biomhdf5_hpp #define biomhdf5_hpp #include "biom.hpp" //http://biom-format.org/documentation/format_versions/biom-2.1.html /* Required Groups observation/ : The HDF5 group that contains observation specific information and an observation oriented view of the data observation/matrix : The HDF5 group that contains matrix data oriented for observation-wise operations (e.g., in compressed sparse row format) observation/metadata : The HDF5 group that contains observation specific metadata information observation/group-metadata : The HDF5 group that contains observation specific group metadata information (e.g., phylogenetic tree) sample/ : The HDF5 group that contains sample specific information and a sample oriented data oriented view of the data sample/matrix : The HDF5 group that contains matrix data oriented for sample-wise operations (e.g., in compressed sparse column format) sample/metadata : The HDF5 group that contains sample specific metadata information sample/group-metadata : The HDF5 group that contains sample specific group metadata information (e.g., relationships between samples) */ /* Required Datasets observation/ids : or A (N,) dataset of the observation IDs, where N is the total number of IDs observation/matrix/data : A (nnz,) dataset containing the actual matrix data observation/matrix/indices : A (nnz,) dataset containing the column indices (e.g., maps into samples/ids) observation/matrix/indptr : A (M+1,) dataset containing the compressed row offsets sample/ids : or A (M,) dataset of the sample IDs, where M is the total number of IDs sample/matrix/data : A (nnz,) dataset containing the actual matrix data sample/matrix/indices : A (nnz,) dataset containing the row indices (e.g., maps into observation/ids) sample/matrix/indptr : A (N+1,) dataset containing the compressed column offsets */ /* Required Attributes id : a field that can be used to id a table (or null) type : Table type (a controlled vocabulary) Acceptable values: "OTU table" "Pathway table" "Function table" "Ortholog table" "Gene table" "Metabolite table" "Taxon table" format-url : A string with a static URL providing format details format-version : The version of the current biom format, major and minor generated-by : Package and revision that built the table creation-date : Date the table was built (ISO 8601 format) shape : , the number of OTUs (rows) and number of Samples (cols) in data nnz : The number of non-zero elements in the table */ class BiomHDF5 : public Biom { public: BiomHDF5(); BiomHDF5(string, string); ~BiomHDF5() { } void read(string); void print(string, vector, Picrust*); private: int nnz, numOTUs, numSamples; vector otuNames, sampleNames, taxonomy, otuTaxonomies; vector indices, indptr; vector otudata; void printShared(string, vector, Picrust*); void printFloat(string, vector, Picrust*); vector< vector > getMetaData(Picrust*, bool useRelabund=false); #ifdef USE_HDF5 void readAttributes(H5::H5File& file); string readStringAttributes(H5::Group& fileAttributes, string); void readIntAttributes(H5::Group& fileAttributes, string); vector readNames( H5::H5File& file, H5::Group& group, string); int readTaxonomy( H5::Group& group, string); string readOTUAbundances( H5::Group& group, vector); void printRequiredFileAttributes(H5::Group& fileAttributes, int, int); void printNames(H5::Group& group, vector, string); void printOTUAbundances(H5::Group& group, int, int, string, bool); void printOTUTaxonomy(H5::Group& group, string); #endif }; #endif /* biomhdf5_hpp */ mothur-1.48.0/source/datastructures/biomsimple.cpp000066400000000000000000001244171424121717000223530ustar00rootroot00000000000000// // biomsimple.cpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "biomsimple.hpp" #include "picrust.hpp" /**************************************************************************************************/ BiomSimple::BiomSimple() : Biom("Biological Observation Matrix 1.0.0"){ try { matrixFormat = "sparse"; } catch(exception& e) { m->errorOut(e, "BiomSimple", "BiomSimple"); exit(1); } } /**************************************************************************************************/ BiomSimple::BiomSimple(string fname, string l) : Biom("Biological Observation Matrix 1.0.0"){ try { label = l; matrixFormat = "sparse"; read(fname); } catch(exception& e) { m->errorOut(e, "BiomSimple", "BiomSimple"); exit(1); } } /**************************************************************************************************/ void BiomSimple::read(string fname){ try { /*{ "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique", "format": "Biological Observation Matrix 0.9.1", "format_url": "http://biom-format.org", "type": "OTU table", "generated_by": "mothur1.44.0", "date": "Tue Apr 17 13:12:07 2020", rows represent OTUS columns represent samples */ ifstream in; util.openInputFile(fname, in); matrixFormat = ""; matrixElementType = ""; vector otuNames; vector groupNames; map fileLines; //vector names; int numOTUs, numCols; bool hasTaxonomy; numOTUs = 0; numCols = 0; maxLevel = 0; int shapeNumRows = 0; int shapeNumCols = 0; int countOpenBrace = 0; int countClosedBrace = 0; int closeParen = 0; int openParen = -1; //account for opening brace bool ignoreCommas = false; bool atComma = false; string line = ""; bool printHeaders = true; while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1" if (m->getControl_pressed()) { break; } char c = in.get(); gobble(in); if (c == '[') { countOpenBrace++; } else if (c == ']') { countClosedBrace++; } else if (c == '{') { openParen++; } else if (c == '}') { closeParen++; } else if ((!ignoreCommas) && (c == ',')) { atComma = true; } if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; } else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; } if (atComma && !ignoreCommas) { if (fileLines.size() == 0) { //clip first { line = line.substr(1); } string tag = getTag(line); fileLines[tag] = line; line = ""; atComma = false; ignoreCommas = false; }else { line += c; } } if (line != "") { line = line.substr(0, line.length()-1); string tag = getTag(line); fileLines[tag] = line; } in.close(); //check for required fields map::iterator it; it = fileLines.find("type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); } else { string thisLine = it->second; tableType = getTag(thisLine); } if (m->getControl_pressed()) { return; } it = fileLines.find("matrix_type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); } else { string thisLine = it->second; matrixFormat = getTag(thisLine); if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { return; } it = fileLines.find("matrix_element_type"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); } else { string thisLine = it->second; matrixElementType = getTag(thisLine); if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { return; } map otuTaxonomies; it = fileLines.find("rows"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); } else { maxLevel = 0; string thisLine = it->second; bool hasTaxonomy = false; vector< vector > results = extractTaxonomyData(thisLine, numOTUs, hasTaxonomy); if ((tableType == "Taxon table") || (tableType == "Taxontable")) { vector taxonomies = results[0]; //create OTU names string snumBins = toString(numOTUs); for (int i = 0; i < numOTUs; i++) { //if there is a bin label use it otherwise make one string binLabel = "OTU"; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; otuNames.push_back(binLabel); otuTaxonomies[otuNames[i]] = taxonomies[i]; } }else{ otuNames = results[0]; if (hasTaxonomy) { for (int i = 0; i < otuNames.size(); i++) { otuTaxonomies[otuNames[i]] = results[1][i]; } } } } if (m->getControl_pressed()) { return; } it = fileLines.find("columns"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); } else { string thisLine = it->second; //read sample names maxLevel = 0; bool hasTaxonomy = false; vector< vector > results = extractTaxonomyData(thisLine, numCols, hasTaxonomy); groupNames = results[0]; if (hasTaxonomy) { for (int i = 0; i < results[1].size(); i++) { if (m->getControl_pressed()) { break; } string completeTax = util.addUnclassifieds(results[1][i], maxLevel, false); groupTaxonomies[results[0][i]] = completeTax; } } } if (m->getControl_pressed()) { return; } it = fileLines.find("shape"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); } else { string thisLine = it->second; getDims(thisLine, shapeNumRows, shapeNumCols); //check shape if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->setControl_pressed(true); } if (shapeNumRows != numOTUs) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numOTUs) + " rows.\n"); m->setControl_pressed(true); } } if (m->getControl_pressed()) { return; } it = fileLines.find("data"); if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); } else { string thisLine = it->second; if (shared != nullptr) { delete shared; } shared = extractOTUData(thisLine, groupNames, numOTUs); shared->setOTUNames(otuNames); m->mothurOut("\n"+shared->getLabel()+"\n"); if (otuTaxonomies.size() != 0) { //sanity check if ((shared->getNumBins() == otuTaxonomies.size()) && (shared->getNumBins() == numOTUs)) { for (int i = 0; i < shared->getNumBins(); i++) { if (m->getControl_pressed()) { break; } string thisOTUsTax = otuTaxonomies[otuNames[i]]; string newTax = util.addUnclassifieds(thisOTUsTax, maxLevel, false); Taxonomy thisOTUsTaxonomy(otuNames[i], newTax, shared->getOTUTotal(i)); consTax.push_back(thisOTUsTaxonomy); } } } } } catch(exception& e) { m->errorOut(e, "BiomSimple", "read"); exit(1); } } //********************************************************************************************************************** //designed for things like "type": "OTU table", returns type string BiomSimple::getTag(string& line) { try { bool inQuotes = false; string tag = ""; char c = '\"'; for (int i = 0; i < line.length(); i++) { //you want to ignore any ; until you reach the next ' if ((line[i] == c) && (!inQuotes)) { inQuotes = true; } else if ((line[i] == c) && (inQuotes)) { inQuotes= false; line = line.substr(i+1); return tag; } if (inQuotes) { if (line[i] != c) { tag += line[i]; } } } return tag; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getTag"); exit(1); } } //********************************************************************************************************************** //readRows vector< vector > BiomSimple::extractTaxonomyData(string line, int& numOTUs, bool& hasTaxonomy) { try { /*"rows":[ {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, ... "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null}, {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null} .... make look like above ],*/ vector< vector > results; results.resize(2); int countOpenBrace = 0; int countClosedBrace = 0; int openParen = 0; int closeParen = 0; string nextRow = ""; bool end = false; bool allBlank = true; for (int i = 0; i < line.length(); i++) { if (m->getControl_pressed()) { return results; } if (line[i] == '[') { countOpenBrace++; } else if (line[i] == ']') { countClosedBrace++; } else if (line[i] == '{') { openParen++; } else if (line[i] == '}') { closeParen++; } else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info //you have reached the end of the rows info if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; } if ((openParen == closeParen) && (closeParen != 0)) { //process row numOTUs++; vector result = getNamesAndTaxonomies(nextRow); if (result.size() != 0) { results[0].push_back(result[0]); results[1].push_back(result[1]); if (result[1] != "") { allBlank = false; } } nextRow = ""; openParen = 0; closeParen = 0; } } if (allBlank) { hasTaxonomy = false; } else { hasTaxonomy = true; } return results; } catch(exception& e) { m->errorOut(e, "BiomSimple", "extractTaxonomyData"); exit(1); } } //********************************************************************************************************************** //items[0] = id, items[1] = taxonomy, if items[2] then thats the taxonomy bootstrap values vector BiomSimple::getNamesAndTaxonomies(string line) { try { /*"rows":[ {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}}, ... "rows":[{"id": "k__Archaea;p__Euryarchaeota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae", "metadata": null}, {"id": "k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae", "metadata": null} .... make look like above ],*/ vector results; if (line == "") { return results; } int pos = line.find_first_of(','); if (pos == string::npos) { //some kind of error?? we expect at least metadata : null, just grab name results.push_back(getName(line)); results.push_back(""); }else { string value; util.splitAtComma(value, line); //value hold name portion ("id":"Otu01") line holds rest results.push_back(getName(value)); string taxonomy = ""; string bootstrap = ""; int pos = line.find("taxonomy"); if (pos != string::npos) { //no taxonomy info given int pos2 = line.find("bootstrap"); if (pos2 != string::npos) { //no taxonomy info given taxonomy = line.substr(pos, (pos2-pos)); taxonomy = taxonomy.substr(0, taxonomy.find_last_of(',')); bootstrap = line.substr(pos2); }else { taxonomy = line.substr(pos); } } results.push_back(getTaxonomy(taxonomy, bootstrap)); } return results; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getNamesAndTaxonomies"); exit(1); } } //********************************************************************************************************************** string BiomSimple::getName(string line) { try { vector nameItems; util.splitAtChar(line, nameItems, ':'); //split part we want containing the ids string name = nameItems[1]; //remove "" if needed int pos = name.find("\""); if (pos != string::npos) { string newName = ""; for (int k = 0; k < name.length(); k++) { if (name[k] != '\"') { newName += name[k]; } } name = newName; } return name; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getName"); exit(1); } } //********************************************************************************************************************** //"taxonomy":"Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified", //"bootstrap":100, 100, 100, 100, 100, 100 string BiomSimple::getTaxonomy(string taxonomy, string bootstrap) { try { vector results; if (taxonomy != "") { vector taxItems; util.splitAtChar(taxonomy, taxItems, ':'); //split part we want containing the ids string taxons = taxItems[1]; string taxon; while((taxons.find_first_of(',') != -1)) { if (m->getControl_pressed()) {break;} util.splitAtComma(taxon, taxons); results.push_back(taxon); } if (!util.stringBlank(taxons)) { results.push_back(taxons); } } if (bootstrap != "") { vector bootItems; util.splitAtChar(bootstrap, bootItems, ':'); //split part we want containing the ids string bootValues = bootItems[1]; string bootValue; int i = 0; while((bootValues.find_first_of(',') != -1)) { if (m->getControl_pressed()) {break;} util.splitAtComma(bootValue, bootValues); results[i]+="("+bootValue+")"; i++; } if (!util.stringBlank(bootValues)) { results[i]+="("+bootValues+")"; } } string result = ""; for (int i = 0; i < results.size(); i++) { if (m->getControl_pressed()) {result = ""; break;} result += results[i] + ";"; } if (results.size() > maxLevel) { maxLevel = results.size(); } return result; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getTaxonomy"); exit(1); } } //********************************************************************************************************************** void BiomSimple::getDims(string line, int& shapeNumRows, int& shapeNumCols) { try { //get shape bool inBar = false; string num = ""; for (int i = 0; i < line.length(); i++) { //you want to ignore any ; until you reach the next ' if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && (inBar)) { inBar= false; util.mothurConvert(num, shapeNumCols); break; } if (inBar) { if (line[i] == ',') { util.mothurConvert(num, shapeNumRows); num = ""; }else { if (!isspace(line[i])) { num += line[i]; } } } } } catch(exception& e) { m->errorOut(e, "BiomSimple", "getDims"); exit(1); } } //********************************************************************************************************************** //readData SharedRAbundVectors* BiomSimple::extractOTUData(string line, vector& groupNames, int numOTUs) { try { SharedRAbundVectors* lookup = new SharedRAbundVectors(); //creates new sharedRAbunds for (int i = 0; i < groupNames.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0 temp->setLabel(label); temp->setGroup(groupNames[i]); lookup->push_back(temp); } if (matrixElementType == "float") { if (sharedFloat != nullptr) { delete sharedFloat; } sharedFloat = new SharedRAbundFloatVectors(); //creates new sharedRAbunds for (int i = 0; i < groupNames.size(); i++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(numOTUs); //sets all abunds to 0 temp->setLabel(label); temp->setGroup(groupNames[i]); sharedFloat->push_back(temp); } } bool dataStart = false; bool inBrackets = false; string num = ""; vector nums; vector numsFloat; int otuCount = 0; for (int i = 0; i < line.length(); i++) { if (m->getControl_pressed()) { return lookup; } //look for opening [ to indicate data is starting if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data if (dataStart) { if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } } else if ((line[i] == ']') && (inBrackets)) { inBrackets = false; int temp; float temp2; if (matrixElementType == "float") { util.mothurConvert(num, temp2); numsFloat.push_back(temp2); temp = (int)temp2; }else { util.mothurConvert(num, temp); } nums.push_back(temp); num = ""; //save info to vectors if (matrixFormat == "dense") { //sanity check if (nums.size() != lookup->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); } //set abundances for this otu //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU for (int j = 0; j < groupNames.size(); j++) { lookup->set(otuCount, nums[j], groupNames[j]); } if (matrixElementType == "float") { //sanity check if (numsFloat.size() != sharedFloat->size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->setControl_pressed(true); } //set abundances for this otu //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU for (int j = 0; j < groupNames.size(); j++) { sharedFloat->set(otuCount, numsFloat[j], groupNames[j]); } } otuCount++; }else { //sanity check if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->setControl_pressed(true); } //nums contains [otuNum, sampleNum, abundance] lookup->set(nums[0], nums[2], groupNames[nums[1]]); if (matrixElementType == "float") { //nums contains [otuNum, sampleNum, abundance] sharedFloat->set(nums[0], numsFloat[2], groupNames[nums[1]]); } } nums.clear(); numsFloat.clear(); } if (inBrackets) { if (line[i] == ',') { float temp2; util.mothurConvert(num, temp2); numsFloat.push_back(temp2); nums.push_back((int)temp2); num = ""; }else { if (!isspace(line[i])) { num += line[i]; } } } } } return lookup; } catch(exception& e) { m->errorOut(e, "BiomSimple", "extractOTUData"); exit(1); } } //********************************************************************************************************************** void BiomSimple::print(string filename, vector sampleMetadata, Picrust* picrust) { try { vector metadata = getMetaDataShared(picrust); int numBins = shared->getNumBins(); int numSamples = shared->size(); vector currentLabels = shared->getOTUNames(); vector namesOfGroups = shared->getNamesGroups(); if (m->getControl_pressed()) { return; } time_t rawtime; struct tm * timeinfo; time ( &rawtime ); timeinfo = localtime ( &rawtime ); string dateString = asctime (timeinfo); int pos = dateString.find('\n'); if (pos != string::npos) { dateString = dateString.substr(0, pos);} string spaces = " "; ofstream out; util.openOutputFile(filename, out); out << "{\n" + spaces + "\"id\":\"" + util.getSimpleName(sharedFileName) + "-" + label + "\",\n" + spaces + "\"format\": \"" + version + "\",\n" + spaces + "\"format_url\": \"" + formatURL + "\",\n"; out << spaces + "\"type\": \"" + tableType + " \",\n" + spaces + "\"generated_by\": \"" << mothurVersion << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n"; //get row info /*"rows":[ {"id":"GG_OTU_1", "metadata":null}, {"id":"GG_OTU_2", "metadata":null}, {"id":"GG_OTU_3", "metadata":null}, {"id":"GG_OTU_4", "metadata":null}, {"id":"GG_OTU_5", "metadata":null} ],*/ out << spaces + "\"rows\":[\n"; string rowFront = spaces + spaces + "{\"id\":\""; string rowBack = "\", \"metadata\":"; for (int i = 0; i < numBins-1; i++) { if (m->getControl_pressed()) { out.close(); return; } out << rowFront << currentLabels[i] << rowBack << metadata[i] << "},\n"; } out << rowFront << currentLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; //get column info /*"columns": [ {"id":"Sample1", "metadata":null}, {"id":"Sample2", "metadata":null}, {"id":"Sample3", "metadata":null}, {"id":"Sample4", "metadata":null}, {"id":"Sample5", "metadata":null}, {"id":"Sample6", "metadata":null} ],*/ string colBack = "\", \"metadata\":"; out << spaces + "\"columns\":[\n"; for (int i = 0; i < namesOfGroups.size()-1; i++) { if (m->getControl_pressed()) { out.close(); return; } out << rowFront << namesOfGroups[i] << colBack << sampleMetadata[i] << "},\n"; } out << rowFront << namesOfGroups[(namesOfGroups.size()-1)] << colBack << sampleMetadata[numSamples-1] << "}\n" + spaces + "],\n"; out << spaces + "\"matrix_type\": \"" << matrixFormat << "\",\n" + spaces + "\"matrix_element_type\": \"" + matrixElementType + "\",\n"; out << spaces + "\"shape\": [" << numBins << "," << numSamples << "],\n"; out << spaces + "\"data\": ["; vector dataRows; if (matrixFormat == "sparse") { /*"data":[[0,2,1], [1,0,5], [1,1,1], [1,3,2], [1,4,3], [1,5,1], [2,2,1], [2,3,4], [2,4,2], [3,0,2], [3,1,1], [3,2,1], [3,5,1], [4,1,1], [4,2,1] ]*/ if (matrixElementType == "int") { for (int i = 0; i < shared->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); return; } vector binAbunds = shared->getOTU(i); for (int j = 0; j < binAbunds.size(); j++) { int abund = binAbunds[j]; string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]"; //only print non zero values if (abund != 0) { dataRows.push_back(binInfo); } } } }else { float zero = 0.0; for (int i = 0; i < sharedFloat->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); return; } vector binAbunds = sharedFloat->getOTU(i); for (int j = 0; j < binAbunds.size(); j++) { float abund = binAbunds[j]; string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(abund) + "]"; //only print non zero values if (!util.isEqual(abund,zero)) { dataRows.push_back(binInfo); } } } } }else { /* "matrix_type": "dense", "matrix_element_type": "int", "shape": [5,6], "data": [[0,0,1,0,0,0], [5,1,0,2,3,1], [0,0,1,4,2,0], [2,1,1,0,0,1], [0,1,1,0,0,0]]*/ if (matrixElementType == "int") { for (int i = 0; i < shared->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); return; } string binInfo = "["; vector binAbund = shared->getOTU(i); for (int j = 0; j < binAbund.size()-1; j++) { binInfo += toString(binAbund[j]) + ","; } binInfo += toString(binAbund[binAbund.size()-1]) + "]"; dataRows.push_back(binInfo); } }else { for (int i = 0; i < sharedFloat->getNumBins(); i++) { if (m->getControl_pressed()) { out.close(); return; } string binInfo = "["; vector binAbund = sharedFloat->getOTU(i); for (int j = 0; j < binAbund.size()-1; j++) { binInfo += toString(binAbund[j]) + ","; } binInfo += toString(binAbund[binAbund.size()-1]) + "]"; dataRows.push_back(binInfo); } } } for (int i = 0; i < dataRows.size()-1; i++) { out << dataRows[i] << ",\n" + spaces + spaces; } out << dataRows[dataRows.size()-1] << "]\n"; out << "}\n"; } catch(exception& e) { m->errorOut(e, "BiomSimple", "print"); exit(1); } } //********************************************************************************************************************** vector BiomSimple::getMetaDataShared(Picrust* picrust){ try { vector metadata; if (consTax.size() == 0) { for (int i = 0; i < shared->getNumBins(); i++) { metadata.push_back("null"); } } else { if (shared == nullptr) { m->setControl_pressed(true); return metadata; } //should the labels be Otu001 or PhyloType001 vector otuNames = shared->getOTUNames(); string firstBin = otuNames[0]; string binTag = "Otu"; if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; } map labelTaxMap; string snumBins = toString(otuNames.size()); for (int i = 0; i < consTax.size(); i++) { if (m->getControl_pressed()) { return metadata; } string thisOtuLabel = consTax[i].getName(); //if there is a bin label use it otherwise make one if (util.isContainingOnlyDigits(thisOtuLabel)) { string binLabel = binTag; string sbinNumber = thisOtuLabel; if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; binLabel = util.getSimpleLabel(binLabel); labelTaxMap[binLabel] = consTax[i].getConsTaxString(); }else { map::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel)); if (it == labelTaxMap.end()) { labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString(); }else { m->mothurOut("[ERROR]: Cannot add OTULabel " + thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true); } } } //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file if (shared->getNumBins() != labelTaxMap.size()) { m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(shared->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata; } //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug. if (picrust != nullptr) { picrust->setGGOTUIDs(labelTaxMap, shared); } //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]} //traverse the binLabels forming the metadata strings and saving them //make sure to sanity check map::iterator it; vector currentLabels = shared->getOTUNames(); for (int i = 0; i < shared->getNumBins(); i++) { if (m->getControl_pressed()) { return metadata; } it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i])); if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); } else { vector bootstrapValues; string data = "{\"taxonomy\":["; vector scores; vector taxonomies = util.parseTax(it->second, scores); for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; } data += "\"" + taxonomies[taxonomies.size()-1] + "\"]"; //add bootstrap values if available if (scores[0] != "null") { data += ", \"bootstrap\":["; for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; } data += scores[scores.size()-1] + "]"; } data += "}"; metadata.push_back(data); } } } return metadata; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getMetadataShared"); exit(1); } } //********************************************************************************************************************** vector BiomSimple::getMetaDataFloat(Picrust* picrust){ try { vector metadata; if (consTax.size() == 0) { for (int i = 0; i < sharedFloat->getNumBins(); i++) { metadata.push_back("null"); } } else { if (sharedFloat == nullptr) { m->setControl_pressed(true); return metadata; } //should the labels be Otu001 or PhyloType001 vector otuNames = sharedFloat->getOTUNames(); string firstBin = otuNames[0]; string binTag = "Otu"; if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; } map labelTaxMap; string snumBins = toString(otuNames.size()); for (int i = 0; i < consTax.size(); i++) { if (m->getControl_pressed()) { return metadata; } string thisOtuLabel = consTax[i].getName(); //if there is a bin label use it otherwise make one if (util.isContainingOnlyDigits(thisOtuLabel)) { string binLabel = binTag; string sbinNumber = thisOtuLabel; if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; binLabel = util.getSimpleLabel(binLabel); labelTaxMap[binLabel] = consTax[i].getConsTaxString(); }else { map::iterator it = labelTaxMap.find(util.getSimpleLabel(thisOtuLabel)); if (it == labelTaxMap.end()) { labelTaxMap[util.getSimpleLabel(thisOtuLabel)] = consTax[i].getConsTaxString(); }else { m->mothurOut("[ERROR]: Cannot add OTULabel " + thisOtuLabel + " because it's simple label " + util.getSimpleLabel(consTax[i].getName()) + " has already been added and will result in downstream errors. Have you mixed mothur labels and non mothur labels? To make the files work well together and backwards compatible mothur treats 1, OTU01, OTU001, OTU0001 all the same. We do this by removing any non numeric characters and leading zeros. For eaxample: Otu000018 and OtuMY18 both map to 18.\n"); m->setControl_pressed(true); } } } //sanity check for file issues - do you have the same number of bins in the shared and constaxonomy file if (sharedFloat->getNumBins() != labelTaxMap.size()) { m->mothurOut("[ERROR]: Your constaxonomy file contains " + toString(labelTaxMap.size()) + " otus and your shared file contain " + toString(sharedFloat->getNumBins()) + " otus, cannot continue.\n"); m->setControl_pressed(true); return metadata; } //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug. if (picrust != nullptr) { picrust->setGGOTUIDs(labelTaxMap, sharedFloat); } //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]} //traverse the binLabels forming the metadata strings and saving them //make sure to sanity check map::iterator it; vector currentLabels = sharedFloat->getOTUNames(); for (int i = 0; i < sharedFloat->getNumBins(); i++) { if (m->getControl_pressed()) { return metadata; } it = labelTaxMap.find(util.getSimpleLabel(currentLabels[i])); if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + currentLabels[i] + ".\n"); m->setControl_pressed(true); } else { vector bootstrapValues; string data = "{\"taxonomy\":["; vector scores; vector taxonomies = util.parseTax(it->second, scores); for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; } data += "\"" + taxonomies[taxonomies.size()-1] + "\"]"; //add bootstrap values if available if (scores[0] != "null") { data += ", \"bootstrap\":["; for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; } data += scores[scores.size()-1] + "]"; } data += "}"; metadata.push_back(data); } } } return metadata; } catch(exception& e) { m->errorOut(e, "BiomSimple", "getMetadataFloat"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/biomsimple.hpp000066400000000000000000000017311424121717000223510ustar00rootroot00000000000000// // biomsimple.hpp // Mothur // // Created by Sarah Westcott on 10/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef biomsimple_hpp #define biomsimple_hpp //biom version 0.9.1 #include "biom.hpp" #include "picrust.hpp" class BiomSimple : public Biom { public: BiomSimple(); BiomSimple(string, string); // filename, label ~BiomSimple() { } void read(string); void print(string, vector, Picrust*); //filename, metatdata, picrust private: string getTag(string&); void getDims(string, int&, int&); SharedRAbundVectors* extractOTUData(string, vector&, int); vector< vector > extractTaxonomyData(string, int&, bool&); vector getNamesAndTaxonomies(string); string getName(string); string getTaxonomy(string, string); vector getMetaDataShared(Picrust*); vector getMetaDataFloat(Picrust*); }; #endif /* biomsimple_hpp */ mothur-1.48.0/source/datastructures/compare.h000066400000000000000000000022041424121717000212730ustar00rootroot00000000000000// // compare.h // Mothur // // Created by Sarah Westcott on 6/5/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef compare_h #define compare_h #include "mothurout.h" //*************************************************************************************************************** struct Compare { Compare(){ AA=0; AT=0; AG=0; AC=0; TA=0; TT=0; TG=0; TC=0; GA=0; GT=0; GG=0; GC=0; CA=0; CT=0; CG=0; CC=0; NA=0; NT=0; NG=0; NC=0; Ai=0; Ti=0; Gi=0; Ci=0; Ni=0; dA=0; dT=0; dG=0; dC=0; refName = ""; queryName = ""; weight = 1; matches = 0; mismatches = 0; total = 0; errorRate = 1.0000; sequence = ""; } ~Compare(){} int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC; string refName, queryName, sequence; double errorRate; int weight, matches, mismatches, total; }; //*************************************************************************************************************** #endif /* compare_h */ mothur-1.48.0/source/datastructures/contigsreport.cpp000066400000000000000000000065011424121717000231060ustar00rootroot00000000000000// // contigsreport.cpp // Mothur // // Created by Sarah Westcott on 7/17/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "contigsreport.hpp" //Name Length Overlap_Length Overlap_Start Overlap_End MisMatches Num_Ns Expected_Errors /******************************************************************************************************************/ ContigsReport::ContigsReport() : Report() { try { fillHeaders(); } catch(exception& e) { m->errorOut(e, "ContigsReport", "ContigsReport"); exit(1); } } /**************************************************************************************************/ void ContigsReport::read(ifstream& repFile){ try { repFile >> name; repFile >> length; repFile >> overlapLength; repFile >> overlapStart; repFile >> overlapEnd; repFile >> misMatches; repFile >> numsNs; repFile >> expectedErrors; gobble(repFile); } catch(exception& e) { m->errorOut(e, "ContigsReport", "read"); exit(1); } } /******************************************************************************************************************/ void ContigsReport::print(ofstream& reportFile){ try { reportFile << name << '\t' << length << '\t' << overlapLength << '\t'; reportFile << overlapStart << '\t' << overlapEnd << '\t'; reportFile << misMatches << '\t' << numsNs << '\t'; reportFile << setprecision(6) << expectedErrors << endl; } catch(exception& e) { m->errorOut(e, "ContigsReport", "print"); exit(1); } } /******************************************************************************************************************/ string ContigsReport::getSeqReport(){ try { string output = ""; output += name + '\t' + toString(length) + '\t' + toString(overlapLength) + '\t'; output += toString(overlapStart) + '\t' + toString(overlapEnd) + '\t'; output += toString(misMatches) + '\t' + toString(numsNs) + '\t'; string temp = toString(expectedErrors); int pos = temp.find_last_of('.'); //find deicmal point if their is one //if there is a decimal if (pos != -1) { temp = temp.substr(0, pos+6); } //set precision to 5 places else{ temp += ".00000"; } output += temp + '\n'; return output; } catch(exception& e) { m->errorOut(e, "ContigsReport", "getSeqReport"); exit(1); } } /******************************************************************************************************************/ void ContigsReport::fillHeaders() { try { reportHeaders.push_back("Name"); reportHeaders.push_back("Length"); reportHeaders.push_back("Overlap_Length"); reportHeaders.push_back("Overlap_Start"); reportHeaders.push_back("Overlap_End"); reportHeaders.push_back("MisMatches"); reportHeaders.push_back("Num_Ns"); reportHeaders.push_back("Expected_Errors"); } catch(exception& e) { m->errorOut(e, "ContigsReport", "fillHeaders"); exit(1); } } /******************************************************************************************************************/ mothur-1.48.0/source/datastructures/contigsreport.hpp000066400000000000000000000042611424121717000231140ustar00rootroot00000000000000// // contigsreport.hpp // Mothur // // Created by Sarah Westcott on 7/17/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef contigsreport_hpp #define contigsreport_hpp #include "report.hpp" /* This class stores information for one line in the contigs report file */ /******************************************************************************************************************/ class ContigsReport : public Report { public: ContigsReport(); ~ContigsReport() = default; //io functions, note - printHeaders / readHeaders / getHeaders in Report parent class void read(ifstream&); //read line in report file void print(ofstream&); //print line in report file string getSeqReport(); //return string containing line from report file //set values void setName(string n) { name = n; } void setLength(int n) { length = n; } void setOverlapLength(int n) { overlapLength = n; } void setOverlapStart(int n) { overlapStart = n; } void setOverlapEnd(int n) { overlapEnd = n; } void setMisMatches(int n) { misMatches = n; } void setNumNs(int n) { numsNs = n; } void setExpectedErrors(float i) { expectedErrors = i; } //get values string getName() { return name; } int getLength() { return length; } int getOverlapLength() { return overlapLength; } int getOverlapStart() { return overlapStart; } int getOverlapEnd() { return overlapEnd; } int getMisMatches() { return misMatches; } int getNumNs() { return numsNs; } float getExpectedErrors() { return expectedErrors; } private: void fillHeaders(); string name; int length, overlapLength, overlapStart, overlapEnd, misMatches, numsNs; float expectedErrors; }; /******************************************************************************************************************/ #endif /* contigsreport_hpp */ mothur-1.48.0/source/datastructures/counttable.cpp000066400000000000000000002603711424121717000223530ustar00rootroot00000000000000// // counttable.cpp // Mothur // // Created by Sarah Westcott on 6/26/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "counttable.h" #include "groupmap.h" /************************************************************/ //used by tree commands int CountTable::createTable(map& g) { try { set names; set groups; for (map::iterator it = g.begin(); it != g.end(); it++) { if (m->getControl_pressed()) { break; } names.insert(it->first); groups.insert(it->second); } return (createTable(names, g, groups)); } catch(exception& e) { m->errorOut(e, "CountTable", "createTable"); exit(1); } } /************************************************************/ //used by tree commands int CountTable::createTable(set& n, map& g, set& gs) { try { hasGroups = false; int numGroups = 0; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); for (set::iterator it = gs.begin(); it != gs.end(); it++) { string gName = *it; util.checkGroupName(gName); groups.push_back(gName); hasGroups = true; } numGroups = groups.size(); totalGroups.resize(numGroups, 0); //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } uniques = 0; total = 0; bool error = false; //n contains treenames for (set::iterator it = n.begin(); it != n.end(); it++) { if (m->getControl_pressed()) { break; } string seqName = *it; vector groupCounts; map::iterator itGroup = g.find(seqName); if (itGroup != g.end()) { groupCounts.push_back(intPair(1, indexGroupMap[itGroup->second])); totalGroups[indexGroupMap[itGroup->second]]++; }else { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct.\n"); } map::iterator it2 = indexNameMap.find(seqName); if (it2 == indexNameMap.end()) { if (hasGroups) { counts.push_back(groupCounts); } indexNameMap[seqName] = uniques; totals.push_back(1); total++; uniques++; }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } } if (error) { m->setControl_pressed(true); } else { //check for zero groups if (hasGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } } } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "createTable"); exit(1); } } /************************************************************/ bool CountTable::testGroups(string file) { try { vector nothing; return testGroups(file, nothing); } catch(exception& e) { m->errorOut(e, "CountTable", "testGroups"); exit(1); } } /************************************************************/ bool CountTable::testGroups(string file, vector& groups) { try { m = MothurOut::getInstance(); hasGroups = false; total = 0; isCompressed = true; ifstream in; util.openInputFile(file, in); string headers = util.getline(in); gobble(in); if (headers[0] == '#') { //is this a count file in compressed form //read headers headers = util.getline(in); gobble(in); //gets compressed group name map line headers = util.getline(in); gobble(in); }else { isCompressed = false; } vector columnHeaders = util.splitWhiteSpace(headers); if (columnHeaders.size() > 2) { hasGroups = true; for (int i = 2; i < columnHeaders.size(); i++) { util.checkGroupName(columnHeaders[i]); groups.push_back(columnHeaders[i]); } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); } return hasGroups; } catch(exception& e) { m->errorOut(e, "CountTable", "testGroups"); exit(1); } } /************************************************************/ bool CountTable::setNamesOfGroups(vector mygroups) { try { //remove groups from table not in new groups we are setting for (int i = 0; i < groups.size();) { if (util.inUsersGroups(groups[i], mygroups)) { ++i; } else { removeGroup(groups[i]); } } //add any new groups in new groups list to table for (int i = 0; i < mygroups.size(); i++) { if (util.inUsersGroups(mygroups[i], groups)) {} else { addGroup(mygroups[i]); } } //false if error return (!m->getControl_pressed()); } catch(exception& e) { m->errorOut(e, "CountTable", "setNamesOfGroups"); exit(1); } } /************************************************************/ int CountTable::createTable(string namefile, string groupfile, vector selectedGroups, bool createGroup) { try { GroupMap* groupMap; int numGroups = 0; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); map originalGroupIndexes; uniques = 0; total = 0; bool error = false; bool pickedGroups = false; if (selectedGroups.size() != 0) { pickedGroups = true; } if (groupfile != "") { hasGroups = true; groupMap = new GroupMap(groupfile); groupMap->readMap(selectedGroups); numGroups = groupMap->getNumGroups(); groups = groupMap->getNamesOfGroups(); totalGroups.resize(numGroups, 0); }else if(createGroup) { hasGroups = true; numGroups = 1; groups.push_back("Group1"); totalGroups.resize(numGroups, 0); } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } if ((namefile == "") && (groupfile == "")) { m->mothurOut("[ERROR]: No name or group file given. You must provide a name or group file to create a count file, please correct.\n"); m->setControl_pressed(true); return 0; } else if (namefile != "") { ifstream in; util.openInputFile(namefile, in); while (!in.eof()) { if (m->getControl_pressed()) { break; } string firstCol, secondCol; in >> firstCol; gobble(in); in >> secondCol; gobble(in); util.checkName(firstCol); util.checkName(secondCol); vector names; util.splitAtChar(secondCol, names, ','); map groupCounts; for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; } //initialize groupCounts int thisTotal = 0; if (groupfile != "") { //get counts for each of the users groups for (int i = 0; i < names.size(); i++) { string group = groupMap->getGroup(names[i]); if (group == "not found") { if (!pickedGroups) { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct.\n"); error=true; } //else - ignore because we assume this read is from a group we are not interested in }else { //this is a read from a group we want to save map::iterator it = groupCounts.find(group); //if not found, then this sequence is not from a group we care about if (it != groupCounts.end()) { it->second++; } thisTotal++; } } }else if (createGroup) { thisTotal = names.size(); groupCounts["Group1"] = thisTotal; }else { thisTotal = names.size(); } //if group info, then read it vector thisGroupsCount; for (map::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) { int groupIndex = indexGroupMap[it->first]; int abund = it->second; if (abund != 0) { intPair thisAbund(it->second, groupIndex); thisGroupsCount.push_back(thisAbund); totalGroups[groupIndex] += abund; } } map::iterator it = indexNameMap.find(firstCol); if (it == indexNameMap.end()) { if (hasGroups) { counts.push_back(thisGroupsCount); } indexNameMap[firstCol] = uniques; totals.push_back(thisTotal); total += thisTotal; uniques++; }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct.\n"); } } in.close(); }else if ((namefile == "") && (groupfile != "")) { //create count file from group only vector names = groupMap->getNamesSeqs(); //only contains names from selectedGroups or all groups if selectedGroups is empty for (int i = 0; i < names.size(); i++) { if (m->getControl_pressed()) { break; } vector abunds; string group = groupMap->getGroup(names[i]); int groupIndex = indexGroupMap[group]; totalGroups[groupIndex]++; intPair thisAbund(1, groupIndex); abunds.push_back(thisAbund); map::iterator it = indexNameMap.find(names[i]); if (it == indexNameMap.end()) { counts.push_back(abunds); indexNameMap[names[i]] = uniques; totals.push_back(1); total++; uniques++; }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + names[i] + ", sequence names must be unique. Please correct.\n"); } } } if (error) { m->setControl_pressed(true); } else { //check for zero groups if (hasGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } } } if (groupfile != "") { delete groupMap; } return total; } catch(exception& e) { m->errorOut(e, "CountTable", "createTable"); exit(1); } } /************************************************************/ int CountTable::readTable(string file, string format) { try { filename = file; hasGroups = false; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); bool error = false; uniques = 0; total = 0; if (format == "fasta") { ifstream in; util.openInputFile(filename, in); while (!in.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); string name = seq.getName(); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(1) + "\n"); } map::iterator it = indexNameMap.find(name); if (it == indexNameMap.end()) { indexNameMap[name] = uniques; totals.push_back(1); total ++; uniques++; }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n"); } } in.close(); if (error) { m->setControl_pressed(true); } }else if (format == "name") { map nameFileCounts = util.readNames(filename); for (map::iterator it = nameFileCounts.begin(); it != nameFileCounts.end(); it++){ if (m->getControl_pressed()) { break; } indexNameMap[it->first] = uniques; totals.push_back(it->second); total += it->second; uniques++; } }else { m->mothurOut("[ERROR]: Unsupported format: " + format + ", please correct.\n"); m->setControl_pressed(true); } return total; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ int CountTable::readTable(string file, bool readGroups, bool mothurRunning) { try { readTable(file, readGroups, mothurRunning, nullVector); return total; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning) { try { readTable(in, readGroups, mothurRunning, nullVector); return total; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ bool CountTable::isCountTable(string file) { try { filename = file; isCompressed = true; ifstream in; util.openInputFile(filename, in); string headers = util.getline(in); gobble(in); if (headers[0] == '#') { //is this a count file in compressed form //read headers headers = util.getline(in); gobble(in); //gets compressed group name map line headers = util.getline(in); gobble(in); }else { isCompressed = false; } vector columnHeaders = util.splitWhiteSpace(headers); in.close(); bool isCount = true; if (columnHeaders.size() >= 2) { vector defaultHeaders = getHardCodedHeaders(); if (defaultHeaders.size() >= 2) { if ((columnHeaders[0] != defaultHeaders[0]) && (columnHeaders[0] != "OTU_Label")) { isCount = false; } if (columnHeaders[1] != defaultHeaders[1]) { isCount = false; } }else { isCount = false; } }else { isCount = false; } return isCount; } catch(exception& e) { m->errorOut(e, "CountTable", "isCountTable"); exit(1); } } /************************************************************/ int CountTable::readTable(string file, bool readGroups, bool mothurRunning, vector selectedGroups) { try { filename = file; ifstream in; util.openInputFile(filename, in); readTable(in, readGroups, mothurRunning, selectedGroups); in.close(); return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ int CountTable::readTable(ifstream& in, bool readGroups, bool mothurRunning, vector selectedGroups) { try { if (!readGroups) { selectedGroups.clear(); } isCompressed = true; string headers = util.getline(in); gobble(in); map headerIndex2Group; //#1,F003D000 2,F003D002 3,F003D004 4,F003D006 5,F003D008 6,F003D142 7,F003D144 8,F003D146 9,F003D148 10,F003D150 if (headers[0] == '#') { //is this a count file in compressed form //read headers headers = util.getline(in); gobble(in); //gets compressed group name map line headers = headers.substr(1); vector groupNameHeaders = util.splitWhiteSpace(headers); for (int i = 0; i < groupNameHeaders.size(); i++) { string groupIndex = ""; string groupName = groupNameHeaders[i]; util.splitAtComma(groupIndex, groupName); int a; util.mothurConvert(groupIndex, a); headerIndex2Group[groupName] = a-1; } headers = util.getline(in); gobble(in); }else { isCompressed = false; } vector columnHeaders = util.splitWhiteSpace(headers); int numGroupsInFile = 0; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); map originalGroupIndexes; if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2; } set setOfSelectedGroups; if (readGroups) { for (int i = 2; i < columnHeaders.size(); i++) { util.checkGroupName(columnHeaders[i]); bool saveGroup = true; if (selectedGroups.size() != 0) { if (!(util.inUsersGroups(columnHeaders[i], selectedGroups))) { saveGroup = false; } } //is this group in selected groups if (saveGroup) { groups.push_back(columnHeaders[i]); if (isCompressed) { map::iterator it = headerIndex2Group.find(columnHeaders[i]); if (it != headerIndex2Group.end()) { originalGroupIndexes[it->second] = columnHeaders[i]; } } else { originalGroupIndexes[i-2] = columnHeaders[i]; } totalGroups.push_back(0); setOfSelectedGroups.insert(columnHeaders[i]); } } } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } int numGroupsSelected = groups.size(); bool error = false; string name; int thisTotal = 0; uniques = 0; total = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> name; gobble(in); in >> thisTotal; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); } if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n"); } //if group info, then read it vector groupCounts; groupCounts.resize(numGroupsSelected, 0); if (columnHeaders.size() > 2) { //file contains groups if (readGroups) { //user wants to save them if (selectedGroups.size() != 0) { //read this seqs groups abundances thisTotal = 0; if (isCompressed) { string groupInfo = util.getline(in); gobble(in); vector groupNodes = util.splitWhiteSpace(groupInfo); vector abunds; for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count string abund = groupNodes[i]; string thisgroup = ""; util.splitAtComma(thisgroup, abund); int a; util.mothurConvert(abund, a); int g; util.mothurConvert(thisgroup, g); g--; string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group int thisIndex = indexGroupMap[groupName]; intPair item(a, thisIndex); abunds.push_back(item); totalGroups[thisIndex] += a; thisTotal += a; } } groupCounts = expandAbunds(abunds); }else { for (int i = 0; i < numGroupsInFile; i++) { int thisGroupAbund = 0; in >> thisGroupAbund; gobble(in); string groupName = originalGroupIndexes[i]; //order of groups in file may not be sorted if (setOfSelectedGroups.count(groupName) != 0) { //we selected this group int thisIndex = indexGroupMap[groupName]; groupCounts[thisIndex] = thisGroupAbund; totalGroups[thisIndex] += thisGroupAbund; thisTotal += thisGroupAbund; } } } }else { if (isCompressed) { string groupInfo = util.getline(in); gobble(in); vector groupNodes = util.splitWhiteSpace(groupInfo); vector abunds; for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count string abund = groupNodes[i]; string thisgroup = ""; util.splitAtComma(thisgroup, abund); int a; util.mothurConvert(abund, a); int g; util.mothurConvert(thisgroup, g); g--; string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted int thisIndex = indexGroupMap[groupName]; intPair item(a, thisIndex); abunds.push_back(item); totalGroups[thisIndex] += a; } groupCounts = expandAbunds(abunds); } else { for (int i = 0; i < numGroupsInFile; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; } } } }else { //read and discard util.getline(in); gobble(in); } } map::iterator it = indexNameMap.find(name); if (it == indexNameMap.end()) { bool saveSeq = true; if (hasGroups && readGroups) { vector thisGroupsCount = compressAbunds(groupCounts); if (thisGroupsCount.size() == 0) { saveSeq = false; } else { counts.push_back(thisGroupsCount); } } if (saveSeq) { indexNameMap[name] = uniques; totals.push_back(thisTotal); total += thisTotal; uniques++; } }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n"); } } if (error) { m->setControl_pressed(true); } else { //check for zero groups if (hasGroups && readGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } } } //if the file has groups, but we didn't read them if (!readGroups) { hasGroups = false; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ int CountTable::readTable(string file, bool readGroups, bool mothurRunning, unordered_set selectedSeqs) { try { filename = file; isCompressed = true; ifstream in; util.openInputFile(filename, in); string headers = util.getline(in); gobble(in); map headerIndex2Group; //#1,F003D000 2,F003D002 3,F003D004 4,F003D006 5,F003D008 6,F003D142 7,F003D144 8,F003D146 9,F003D148 10,F003D150 if (headers[0] == '#') { //is this a count file in compressed form //read headers headers = util.getline(in); gobble(in); //gets compressed group name map line headers = headers.substr(1); vector groupNameHeaders = util.splitWhiteSpace(headers); for (int i = 0; i < groupNameHeaders.size(); i++) { string groupIndex = ""; string groupName = groupNameHeaders[i]; util.splitAtComma(groupIndex, groupName); int a; util.mothurConvert(groupIndex, a); headerIndex2Group[groupName] = a-1; } headers = util.getline(in); gobble(in); }else { isCompressed = false; } vector columnHeaders = util.splitWhiteSpace(headers); int numGroupsInFile = 0; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); map originalGroupIndexes; if ((columnHeaders.size() > 2) && readGroups) { hasGroups = true; numGroupsInFile = columnHeaders.size() - 2; } if (readGroups) { for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); util.checkGroupName(columnHeaders[i]); if (isCompressed) { map::iterator it = headerIndex2Group.find(columnHeaders[i]); if (it != headerIndex2Group.end()) { originalGroupIndexes[it->second] = columnHeaders[i]; } }else { originalGroupIndexes[i-2] = columnHeaders[i]; } totalGroups.push_back(0); } } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } int numGroups = groups.size(); bool error = false; string name; int thisTotal; uniques = 0; total = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> name; gobble(in); in >> thisTotal; gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(thisTotal) + "\n"); } if ((thisTotal == 0) && !mothurRunning) { error=true; m->mothurOut("[ERROR]: Your count table contains a sequence named " + name + " with a total=0. Please correct.\n"); } bool saveSeq = true; map::iterator it = indexNameMap.find(name); if (it == indexNameMap.end()) { if (selectedSeqs.count(name) == 0) { saveSeq = false; } }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n"); } vector groupCounts; groupCounts.resize(numGroups, 0); if (columnHeaders.size() > 2) { //file contains groups if (readGroups && saveSeq) { //user wants to save them if (isCompressed) { string groupInfo = util.getline(in); gobble(in); vector groupNodes = util.splitWhiteSpace(groupInfo); vector abunds; for (int i = 0; i < groupNodes.size(); i++) { //for each non zero group count string abund = groupNodes[i]; string thisgroup = ""; util.splitAtComma(thisgroup, abund); int a; util.mothurConvert(abund, a); int g; util.mothurConvert(thisgroup, g); g--; string groupName = originalGroupIndexes[g]; //order of groups in file may not be sorted int thisIndex = indexGroupMap[groupName]; intPair item(a, thisIndex); abunds.push_back(item); totalGroups[thisIndex] += a; } groupCounts = expandAbunds(abunds); } else { for (int i = 0; i < numGroupsInFile; i++) { int thisIndex = indexGroupMap[originalGroupIndexes[i]]; in >> groupCounts[thisIndex]; gobble(in); totalGroups[thisIndex] += groupCounts[thisIndex]; } } }else { util.getline(in); gobble(in); }//read and discard } if (saveSeq) { if (hasGroups && readGroups) { vector thisGroupsCount = compressAbunds(groupCounts); counts.push_back(thisGroupsCount); } indexNameMap[name] = uniques; totals.push_back(thisTotal); total += thisTotal; uniques++; } } in.close(); if (error) { m->setControl_pressed(true); } else { //check for zero groups if (hasGroups && readGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } } } //if the file has groups, but we didn't read them if (!readGroups) { hasGroups = false; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "readTable"); exit(1); } } /************************************************************/ int CountTable::zeroOutTable() { try { for(int i=0;ierrorOut(e, "CountTable", "zeroOutTable"); exit(1); } } /************************************************************/ int CountTable::clearTable() { try { hasGroups = false; total = 0; uniques = 0; groups.clear(); counts.clear(); totals.clear(); totalGroups.clear(); indexNameMap.clear(); indexGroupMap.clear(); return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "clearTable"); exit(1); } } /************************************************************/ //zeroed reads are not printed vector CountTable::printTable(string file) { try { //remove group if all reads are removed for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } if (isCompressed) { return printCompressedTable(file); } ofstream out; util.openOutputFile(file, out); vector namesInTable; if (total != 0) { printHeaders(out); map reverse; //use this to preserve order for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < totals.size(); i++) { if (totals[i] != 0) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { namesInTable.push_back(itR->second); out << itR->second << '\t' << totals[i]; if (hasGroups) { printGroupAbunds(out, i); } out << endl; } } } } out.close(); return namesInTable; } catch(exception& e) { m->errorOut(e, "CountTable", "printTable"); exit(1); } } /************************************************************/ //zeroed reads are not printed vector CountTable::printNoGroupsTable(string file) { try { ofstream out; util.openOutputFile(file, out); vector namesInTable; if (total != 0) { vector headers = getHardCodedHeaders(); out << headers[0] << '\t' << headers[1] << endl; map reverse; //use this to preserve order for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < totals.size(); i++) { if (totals[i] != 0) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { namesInTable.push_back(itR->second); out << itR->second << '\t' << totals[i] << endl; } } } } out.close(); return namesInTable; } catch(exception& e) { m->errorOut(e, "CountTable", "printTable"); exit(1); } } /************************************************************/ //zeroed reads are not printed vector CountTable::printTable(string file, bool compressedFormat) { try { //remove group if all reads are removed for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } if (compressedFormat) { return printCompressedTable(file); } ofstream out; util.openOutputFile(file, out); vector namesInTable; if (total != 0) { printHeaders(out); map reverse; //use this to preserve order for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < totals.size(); i++) { if (totals[i] != 0) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { namesInTable.push_back(itR->second); out << itR->second << '\t' << totals[i]; if (hasGroups) { printGroupAbunds(out, i); } out << endl; } } } } out.close(); return namesInTable; } catch(exception& e) { m->errorOut(e, "CountTable", "printTable"); exit(1); } } /************************************************************/ //zeroed seqs are not printed vector CountTable::printCompressedTable(string file, vector groupsToPrint) { try { ofstream out; util.openOutputFile(file, out); vector namesInTable; bool pickedGroups = false; if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups set selectedGroupsIndicies = printCompressedHeaders(out, groupsToPrint); if (total != 0) { map reverse; //use this to preserve order for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < totals.size(); i++) { if (totals[i] != 0) { if (pickedGroups) { string groupOutput = ""; long long thisTotal = 0; for (int j = 0; j < counts[i].size(); j++) { if (selectedGroupsIndicies.count(counts[i][j].group) != 0) { //this is a group we want groupOutput += '\t' + toString(counts[i][j].group+1) + ',' + toString(counts[i][j].abund); thisTotal += counts[i][j].abund; } } if (thisTotal != 0) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { namesInTable.push_back(itR->second); out << itR->second << '\t' << thisTotal << groupOutput << endl; } } } else { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { namesInTable.push_back(itR->second); out << itR->second << '\t' << totals[i]; if (hasGroups) { for (int j = 0; j < counts[i].size(); j++) { out << '\t' << counts[i][j].group+1 << ',' << counts[i][j].abund; } } out << endl; } } } } } out.close(); return namesInTable; } catch(exception& e) { m->errorOut(e, "CountTable", "printCompressedTable"); exit(1); } } /************************************************************/ //returns index of intPair for group passed in. If group is not present in seq, returns index of next group or -1 int CountTable::find(int seq, int group, bool returnNext) { try { //if (!returnNext) { return find(seq, group); } int index = -1; for (int i = 0; i < counts[seq].size(); i++) { if (counts[seq][i].group >= group) { //found it or done looking if (counts[seq][i].group == group) { index = i; } break; } } return index; } catch(exception& e) { m->errorOut(e, "CountTable", "find"); exit(1); } }/************************************************************/ //returns abundance of intPair for seq and group passed in. If group is not present in seq, returns 0 int CountTable::getAbund(int seq, int group) { try { int index = find(seq, group, false); if (index != -1) { //this seq has a non zero abundance for this group return counts[seq][index].abund; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "getAbund"); exit(1); } } /************************************************************/ vector CountTable::expandAbunds(vector& items) { try { vector abunds; abunds.resize(groups.size(), 0); //prefill with 0's for (int i = 0; i < items.size(); i++) { //for each non zero entry abunds[items[i].group] = items[i].abund; //set abund for group } return abunds; } catch(exception& e) { m->errorOut(e, "CountTable", "expandAbunds"); exit(1); } } /************************************************************/ vector CountTable::expandAbunds(int index) { try { vector abunds; abunds.resize(groups.size(), 0); //prefill with 0's for (int i = 0; i < counts[index].size(); i++) { //for each non zero entry abunds[counts[index][i].group] = counts[index][i].abund; //set abund for group } return abunds; } catch(exception& e) { m->errorOut(e, "CountTable", "expandAbunds"); exit(1); } } /************************************************************/ //assumes same order as groups vector CountTable::compressAbunds(vector abunds) { try { vector row; for (int i = 0; i < abunds.size(); i++) { if (abunds[i] != 0) { intPair thisAbund(abunds[i], i); row.push_back(thisAbund); } } return row; } catch(exception& e) { m->errorOut(e, "CountTable", "compressAbunds"); exit(1); } } /************************************************************/ void CountTable::printGroupAbunds(ofstream& out, int index) { try { vector abunds = expandAbunds(index); for (int i = 0; i < abunds.size(); i++) { out << '\t' << abunds[i]; } } catch(exception& e) { m->errorOut(e, "CountTable", "printGroupAbunds"); exit(1); } } /************************************************************/ vector CountTable::printSortedTable(string file) { try { //remove group if all reads are removed for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } ofstream out; util.openOutputFile(file, out); printHeaders(out); vector namesInTable; for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { string seqName = it->first; int index = it->second; if (totals[index] != 0) { namesInTable.push_back(seqName); out << seqName << '\t' << totals[index]; if (hasGroups) { printGroupAbunds(out, index); } out << endl; } } out.close(); return namesInTable; } catch(exception& e) { m->errorOut(e, "CountTable", "printSortedTable"); exit(1); } } /************************************************************/ vector CountTable::getHardCodedHeaders() { try { vector headers; headers.push_back("Representative_Sequence"); headers.push_back("total"); return headers; } catch(exception& e) { m->errorOut(e, "CountTable", "printHeaders"); exit(1); } } /************************************************************/ int CountTable::printHeaders(ofstream& out, vector selectedGroups) { try { //remove group if all reads are removed for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } bool pickedGroups = false; if (selectedGroups.size() != 0) { pickedGroups = true; } out << "Representative_Sequence\ttotal"; if (hasGroups) { for (int i = 0; i < groups.size(); i++) { if (pickedGroups) { if (util.inUsersGroups(groups[i], selectedGroups)) { out << '\t' << groups[i]; } } else { out << '\t' << groups[i]; } } } out << endl; return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "printHeaders"); exit(1); } } /************************************************************/ set CountTable::printCompressedHeaders(ofstream& out, vector groupsToPrint) { try { bool pickedGroups = false; set selectedGroupsIndicies; if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups if (total != 0) { if (hasGroups) { map reverse; for (map::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; } map::iterator it = reverse.begin(); string group1Name = it->second; if (pickedGroups) { //find selected groups indicies for (map::iterator it = reverse.begin(); it != reverse.end(); it++) { if (util.inUsersGroups(it->second, groupsToPrint)) { group1Name = it->second; break; } } } out << "#Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group " + group1Name + "." << endl; out << "#"; for (map::iterator it = reverse.begin(); it != reverse.end(); it++) { if (pickedGroups) { //find selected groups indicies if (util.inUsersGroups(it->second, groupsToPrint)) { selectedGroupsIndicies.insert(it->first); out << it->first+1 << "," << it->second << "\t"; } }else { out << it->first+1 << "," << it->second << "\t"; } } out << endl; } printHeaders(out, groupsToPrint); } return selectedGroupsIndicies; } catch(exception& e) { m->errorOut(e, "CountTable", "printCompressedHeaders"); exit(1); } } /************************************************************/ int CountTable::printSeq(ofstream& out, string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { if (totals[it->second] != 0) { out << it->first << '\t' << totals[it->second]; if (hasGroups) { printGroupAbunds(out, it->second); } out << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "printSeq"); exit(1); } } /************************************************************/ int CountTable::printCompressedSeq(ofstream& out, string seqName, vector groupsToPrint) { try { map::iterator itName = indexNameMap.find(seqName); if (itName == indexNameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { int i = itName->second; if (totals[i] != 0) { if (hasGroups) { bool pickedGroups = false; if (groupsToPrint.size() != 0) { if (hasGroups) { pickedGroups = true; } } //if no groups selected, print all groups if (pickedGroups) { map reverse; //index to group for (map::iterator it = indexGroupMap.begin(); it !=indexGroupMap.end(); it++) { reverse[it->second] = it->first; } set selectedGroupsIndicies; for (map::iterator it = reverse.begin(); it != reverse.end(); it++) { if (pickedGroups) { //find selected groups indicies if (util.inUsersGroups(it->second, groupsToPrint)) { selectedGroupsIndicies.insert(it->first); } } } string groupOutput = ""; long long thisTotal = 0; for (int j = 0; j < counts[i].size(); j++) { if (selectedGroupsIndicies.count(counts[i][j].group) != 0) { //this is a group we want groupOutput += '\t' + toString(counts[i][j].group+1) + ',' + toString(counts[i][j].abund); thisTotal += counts[i][j].abund; } } if (thisTotal != 0) { out << itName->first << '\t' << thisTotal << groupOutput << endl; } } else { out << itName->first << '\t' << totals[i]; for (int j = 0; j < counts[i].size(); j++) { out << '\t' << counts[i][j].group+1 << ',' << counts[i][j].abund; } } }else { out << itName->first << '\t' << totals[i]; } out << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "printCompressedSeq"); exit(1); } } /************************************************************/ //group counts for a seq vector CountTable::getGroupCounts(string seqName) { try { vector temp = getItems(seqName); return (expandAbunds(temp)); } catch(exception& e) { m->errorOut(e, "CountTable", "getGroupCounts"); exit(1); } } /************************************************************/ //group counts for a seq vector CountTable::getItems(string seqName) { try { vector temp; if (hasGroups) { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { temp = counts[it->second]; } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return temp; } catch(exception& e) { m->errorOut(e, "CountTable", "getGroupCounts"); exit(1); } } /************************************************************/ //total number of sequences for the group int CountTable::getGroupCount(string groupName) { try { if (hasGroups) { map::iterator it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { return totalGroups[it->second]; } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "getGroupCount"); exit(1); } } /************************************************************/ //total number of sequences for the seq for the group int CountTable::getGroupCount(string seqName, string groupName) { try { if (hasGroups) { map::iterator it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { map::iterator it2 = indexNameMap.find(seqName); if (it2 == indexNameMap.end()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { return expandAbunds(it2->second)[it->second]; } } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "getGroupCount"); exit(1); } } /************************************************************/ //set the number of sequences for the seq for the group int CountTable::setAbund(string seqName, string groupName, int num) { try { if (hasGroups) { map::iterator it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { map::iterator it2 = indexNameMap.find(seqName); if (it2 == indexNameMap.end()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { int indexOfGroup = find(it2->second, it->second, false); int oldCount = 0; if (indexOfGroup == -1) { //create item for this group intPair newItem(num, it->second); counts[it2->second].push_back(newItem); sortRow(it2->second); }else { //update total for group oldCount = counts[it2->second][indexOfGroup].abund; counts[it2->second][indexOfGroup].abund = num; } totalGroups[it->second] += (num - oldCount); total += (num - oldCount); totals[it2->second] += (num - oldCount); } } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "set"); exit(1); } } /************************************************************/ //add group int CountTable::addGroup(string groupName) { try { bool sanity = util.inUsersGroups(groupName, groups); if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->setControl_pressed(true); return 0; } groups.push_back(groupName); if (!hasGroups) { counts.resize(uniques); } totalGroups.push_back(0); indexGroupMap[groupName] = groups.size()-1; map originalGroupMap = indexGroupMap; //important to play well with others, :) sort(groups.begin(), groups.end()); //fix indexGroupMap && totalGroups vector newTotals; newTotals.resize(groups.size(), 0); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; //find original spot of group[i] int index = originalGroupMap[groups[i]]; newTotals[i] = totalGroups[index]; } totalGroups = newTotals; hasGroups = true; return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "addGroup"); exit(1); } } /************************************************************/ //remove group int CountTable::removeGroup(string groupName) { try { if (hasGroups) { //save for later in case removing a group means we need to remove a seq. map reverse; map::iterator it; for (it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } it = indexGroupMap.find(groupName); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { int indexOfGroupToRemove = it->second; map currentGroupIndex = indexGroupMap; vector newGroups; for (int i = 0; i < groups.size(); i++) { if (groups[i] != groupName) { newGroups.push_back(groups[i]); indexGroupMap[groups[i]] = newGroups.size()-1; } } indexGroupMap.erase(groupName); groups = newGroups; totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove); int thisIndex = 0; map newIndexNameMap; for (int i = 0; i < counts.size(); i++) { if (m->getControl_pressed()) { break; } int indexOfGroup = -1; bool found = false; for (int j = 0; j < counts[i].size(); j++) { if (counts[i][j].group >= indexOfGroupToRemove) { //found it or done looking indexOfGroup = j; if (counts[i][j].group == indexOfGroupToRemove) { found = true; } break; } } if (found) { //you have an abundance for this group int num = counts[i][indexOfGroup].abund; counts[i].erase(counts[i].begin()+indexOfGroup); totals[i] -= num; total -= num; if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you. counts.erase(counts.begin()+i); totals.erase(totals.begin()+i); uniques--; i--; if (i == -1) { i = 0; } indexOfGroup = counts[i].size(); //don't adjust the the group indexes because we removed the read }else { newIndexNameMap[reverse[thisIndex]] = i; } }else { //you don't have this group, nothing to remove if (indexOfGroup == -1) { indexOfGroup = counts[i].size(); } newIndexNameMap[reverse[thisIndex]] = i; } for (int j = indexOfGroup; j < counts[i].size(); j++) { counts[i][j].group -= 1; } thisIndex++; } indexNameMap = newIndexNameMap; if (groups.size() == 0) { hasGroups = false; } } }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "removeGroup"); exit(1); } } /***********************************************************************/ int CountTable::removeGroup(int minSize){ try { if (hasGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] < minSize) { removeGroup(groups[i]); } } }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove groups.\n"); m->setControl_pressed(true); } return groups.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "removeGroup - minSize"); exit(1); } } /************************************************************/ //vector of groups for the seq vector CountTable::getGroups(string seqName) { try { vector thisGroups; map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { if (hasGroups) { int index = it->second; for (int i = 0; i < counts[index].size(); i++) { thisGroups.push_back(groups[counts[index][i].group]); } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } } return thisGroups; } catch(exception& e) { m->errorOut(e, "CountTable", "getGroups"); exit(1); } } /************************************************************/ //total number of seqs represented by seq int CountTable::renameSeq(string oldSeqName, string newSeqName) { try { map::iterator it = indexNameMap.find(oldSeqName); if (it == indexNameMap.end()) { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(oldSeqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + oldSeqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { int index = it->second; indexNameMap.erase(it); indexNameMap[newSeqName] = index; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "renameSeq"); exit(1); } } /************************************************************/ //total number of seqs represented by seq int CountTable::getNumSeqs(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { return totals[it->second]; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "getNumSeqs"); exit(1); } } /************************************************************/ //set total number of seqs represented by seq int CountTable::setNumSeqs(string seqName, int abund) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1; }else { int diff = totals[it->second] - abund; totals[it->second] = abund; total-=diff; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "setNumSeqs"); exit(1); } } /************************************************************/ int CountTable::zeroOutSeq(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); return -1; }else { int abund = totals[it->second]; totals[it->second] = 0; total-=abund; if (hasGroups) { int seqIndexIntoCounts = it->second; for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) { totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund; } counts[seqIndexIntoCounts].clear(); } } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "zeroOutSeq"); exit(1); } } /************************************************************/ //returns unique index for sequence like get in NameAssignment int CountTable::get(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { return it->second; } return -1; } catch(exception& e) { m->errorOut(e, "CountTable", "get"); exit(1); } } /************************************************************/ //add seqeunce without group info int CountTable::push_back(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n"); m->setControl_pressed(true); } indexNameMap[seqName] = uniques; totals.push_back(1); total++; uniques++; }else { m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true); } return 1; } catch(exception& e) { m->errorOut(e, "CountTable", "push_back"); exit(1); } } /************************************************************/ // bool CountTable::inTable(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it != indexNameMap.end()) { return true; } return false; } catch(exception& e) { m->errorOut(e, "CountTable", "inTable"); exit(1); } } /************************************************************/ //remove sequence int CountTable::remove(string seqName) { try { map::iterator it = indexNameMap.find(seqName); if (it != indexNameMap.end()) { int seqIndexIntoCounts = it->second; uniques--; if (hasGroups){ //remove this sequences counts from group totals for (int i = 0; i < counts[seqIndexIntoCounts].size(); i++) { totalGroups[counts[seqIndexIntoCounts][i].group] -= counts[seqIndexIntoCounts][i].abund; } } //save for later in case removing a group means we need to remove a seq. map reverse; for (map::iterator it2 = indexNameMap.begin(); it2 !=indexNameMap.end(); it2++) { reverse[it2->second] = it2->first; } int newIndex = 0; map newIndexNameMap; for (int i = 0; i < counts.size(); i++) { if (i == seqIndexIntoCounts) { }//you are the seq we are trying to remove else { newIndexNameMap[reverse[i]] = newIndex; newIndex++; } } indexNameMap = newIndexNameMap; counts.erase(counts.begin()+seqIndexIntoCounts); int thisTotal = totals[seqIndexIntoCounts]; totals.erase(totals.begin()+seqIndexIntoCounts); total -= thisTotal; //remove group if all reads are removed for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { //m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } }else { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seqName, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seqName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "remove"); exit(1); } } /************************************************************/ //add seqeunce without group info int CountTable::push_back(string seqName, int thisTotal) { try { map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if (hasGroups) { m->mothurOut("[ERROR]: Your count table has groups and I have no group information for " + seqName + ".\n"); m->setControl_pressed(true); } indexNameMap[seqName] = uniques; totals.push_back(thisTotal); total+=thisTotal; uniques++; }else { m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true); } return thisTotal; } catch(exception& e) { m->errorOut(e, "CountTable", "push_back"); exit(1); } } /************************************************************/ //add sequence with group info int CountTable::push_back(string seqName, vector groupCounts, bool ignoreDup=false) { try { int thisTotal = 0; map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n"); m->setControl_pressed(true); } for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; } if (hasGroups) { counts.push_back(compressAbunds(groupCounts)); } indexNameMap[seqName] = uniques; totals.push_back(thisTotal); total+= thisTotal; uniques++; }else { if (ignoreDup) { m->mothurOut("[WARNING]: Your count table contains more than 1 sequence named " + seqName + ". Mothur requires sequence names to be unique. I will only add it once.\n"); }else { m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true); } } return thisTotal; } catch(exception& e) { m->errorOut(e, "CountTable", "push_back"); exit(1); } } /************************************************************/ //add sequence with group info int CountTable::push_back(string seqName, vector groupCounts) { try { int thisTotal = 0; map::iterator it = indexNameMap.find(seqName); if (it == indexNameMap.end()) { if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct.\n"); m->setControl_pressed(true); } for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; } if (hasGroups) { counts.push_back(compressAbunds(groupCounts)); } indexNameMap[seqName] = uniques; totals.push_back(thisTotal); total+= thisTotal; uniques++; }else { m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); m->setControl_pressed(true); } return thisTotal; } catch(exception& e) { m->errorOut(e, "CountTable", "push_back"); exit(1); } } /************************************************************/ //returns size of smallest group. If no groups, returns total num seqs (includes non uniques) int CountTable::getNumSeqsSmallestGroup() { try { int smallestGroupSize = MOTHURMAX; if (hasGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] < smallestGroupSize) { smallestGroupSize = totalGroups[i]; } } } else { return total; } return smallestGroupSize; } catch(exception& e) { m->errorOut(e, "CountTable", "getNumSeqsSmallestGroup"); exit(1); } } /************************************************************/ //create ListVector from uniques ListVector CountTable::getListVector() { try { ListVector list(indexNameMap.size(), "ASV"); for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { if (m->getControl_pressed()) { break; } list.set(it->second, it->first); } return list; } catch(exception& e) { m->errorOut(e, "CountTable", "getListVector"); exit(1); } } /************************************************************/ //returns the names of all unique sequences in file vector CountTable::getNamesOfSeqs() { try { vector names; for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { names.push_back(it->first); } return names; } catch(exception& e) { m->errorOut(e, "CountTable", "getNamesOfSeqs"); exit(1); } } /************************************************************/ //returns the names of all unique sequences in file mapped to their seqCounts map CountTable::getNameMap() { try { map names; for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { names[it->first] = totals[it->second]; } return names; } catch(exception& e) { m->errorOut(e, "CountTable", "getNameMap"); exit(1); } } /************************************************************/ //returns the names of all unique sequences in file mapped to their seqCounts map CountTable::getNameMap(string group) { try { map names; if (hasGroups) { map::iterator it = indexGroupMap.find(group); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { for (map::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) { int abund = getAbund(it2->second, it->second); if (abund != 0) { names[it2->first] = abund; } } } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return names; } catch(exception& e) { m->errorOut(e, "CountTable", "getNameMap"); exit(1); } } /************************************************************/ //returns the names of all unique sequences in file vector CountTable::getNamesOfSeqs(string group) { try { vector names; if (hasGroups) { map::iterator it = indexGroupMap.find(group); if (it == indexGroupMap.end()) { m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { for (map::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) { if (getAbund(it2->second, it->second) != 0) { names.push_back(it2->first); } } } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return names; } catch(exception& e) { m->errorOut(e, "CountTable", "getNamesOfSeqs"); exit(1); } } /************************************************************/ //returns the names of all unique sequences in file vector CountTable::getNamesOfSeqs(vector chosenGroups) { try { vector names; if (hasGroups) { set uniqueNames; for (int i = 0; i < chosenGroups.size(); i++) { vector namesFromThisGroup = getNamesOfSeqs(chosenGroups[i]); for (int j = 0; j < namesFromThisGroup.size(); j++) { uniqueNames.insert(namesFromThisGroup[j]); } } //only adds names once. seqs are likely present in more than one group, but we only want to enter them once for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { names.push_back(*it); } }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->setControl_pressed(true); } return names; } catch(exception& e) { m->errorOut(e, "CountTable", "getNamesOfSeqs"); exit(1); } } /************************************************************/ //merges counts of seq1 and seq2, saving in seq1 int CountTable::mergeCounts(string seq1, string seq2) { try { map::iterator it = indexNameMap.find(seq1); if (it == indexNameMap.end()) { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seq1, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq1 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: " + seq1 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { map::iterator it2 = indexNameMap.find(seq2); if (it2 == indexNameMap.end()) { if (hasGroupInfo()) { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(seq2, groups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + seq2 + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } } m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { if (hasGroupInfo()) { //if no group data then counts are empty //merge data vector countsSeq1 = expandAbunds(it->second); vector countsSeq2 = expandAbunds(it2->second); for (int i = 0; i < groups.size(); i++) { countsSeq1[i] += countsSeq2[i]; } counts[it->second] = compressAbunds(countsSeq1); } totals[it->second] += totals[it2->second]; uniques--; indexNameMap.erase(it2); } } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "getNamesOfSeqs"); exit(1); } } /************************************************************/ int CountTable::copy(CountTable* ct) { try { vector thisGroups = ct->getNamesOfGroups(); for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); } vector names = ct->getNamesOfSeqs(); for (int i = 0; i < names.size(); i++) { if (hasGroups) { vector thisCounts = ct->getGroupCounts(names[i]); push_back(names[i], thisCounts, false); }else { int thisCount = ct->getNumSeqs(names[i]); push_back(names[i], thisCount); } } isCompressed = ct->isTableCompressed(); return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "copy"); exit(1); } } /***********************************************************************/ int CountTable::sortCountTable(){ try { //sorts each rows abunds by group //counts[i] = (1,4),(1,2),(3,7) -> (1,2),(1,4),(3,7) for (int i = 0; i < counts.size(); i++) { sort(counts[i].begin(), counts[i].end(), compareGroups); } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "sortCountTable"); exit(1); } } /***********************************************************************/ int CountTable::sortRow(int index){ try { //saves time in getSmallestCell, by making it so you dont search the repeats sort(counts[index].begin(), counts[index].end(), compareGroups); return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "sortRow"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/counttable.h000077500000000000000000000234371424121717000220230ustar00rootroot00000000000000#ifndef Mothur_counttable_h #define Mothur_counttable_h // // counttable.h // Mothur // // Created by Sarah Westcott on 6/26/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // //This class is designed to read a count table file and store its data. //count table files look like: /* Representative_Sequence total F003D000 F003D002 F003D004 F003D006 F003D008 F003D142 F003D144 F003D146 F003D148 F003D150 MOCK.GQY1XT001 GQY1XT001C296C 6051 409 985 923 937 342 707 458 439 387 464 0 GQY1XT001A3TJI 4801 396 170 413 442 306 769 581 576 497 651 0 GQY1XT001CS2B8 3018 263 226 328 460 361 336 248 290 187 319 0 GQY1XT001CD9IB 2736 239 177 256 405 306 286 263 248 164 392 0 or if no group info was used to create it Representative_Sequence total GQY1XT001C296C 6051 GQY1XT001A3TJI 4801 GQY1XT001CS2B8 3018 GQY1XT001CD9IB 2736 GQY1XT001ARCB1 2183 GQY1XT001CNF2P 2796 GQY1XT001CJMDA 1667 GQY1XT001CBVJB 3758 The sparse format saves space by storing only non zero sample counts. Samples are assigned a numeric value, and only samples with non zero counts are printed to the file. You can see from the table below that GQY1XT001CFHYQ has representation in all samples, with a total abundance of 467. GQY1XT001EI480 has representation in 3 samples: 1 (F003D000) , 8 (F003D146) and 9 (F003D148), with a total abundance of 10. #Compressed Format: groupIndex,abundance. For example 1,6 would mean the read has an abundance of 6 for group F003D000. #1,F003D000 2,F003D002 3,F003D004 4,F003D006 5,F003D008 6,F003D142 7,F003D144 8,F003D146 9,F003D148 10,F003D150 Representative_Sequence total F003D000 F003D002 F003D004 F003D006 F003D008 F003D142 F003D144 F003D146 F003D148 F003D150 GQY1XT001CFHYQ 467 1,325 2,40 3,22 4,30 5,24 6,6 7,7 8,3 9,7 10,3 GQY1XT001C44N8 3677 1,323 2,132 3,328 4,318 5,232 6,579 7,448 8,426 9,381 10,510 GQY1XT001C296C 4652 1,356 2,877 3,754 4,794 5,284 6,538 7,361 8,313 10,375 GQY1XT001ARCB1 2202 1,203 2,391 3,220 4,155 5,308 6,126 7,33 8,191 9,289 10,286 GQY1XT001CFWVZ 1967 1,193 2,152 3,191 4,300 5,228 6,179 7,172 8,161 9,111 10,280 ... GQY1XT001EI480 10 1,8 8,1 9,1 GQY1XT001EDBEC 95 1,9 2,13 3,13 4,7 5,10 6,11 7,8 8,8 9,5 10,11 GQY1XT001D47YY 97 1,10 2,2 3,13 4,21 5,9 6,5 7,11 8,12 9,2 10,12 GQY1XT001CNUHI 19 1,17 2,1 7,1 ... */ #include "mothurout.h" #include "listvector.hpp" #include "sequence.hpp" #include "sharedrabundvectors.hpp" class CountTable { public: CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; uniques = 0; isCompressed = true; } ~CountTable() = default; //reads and creates smart enough to eliminate groups with zero counts int createTable(map&); //seqName->group int createTable(set&, map&, set&); //seqNames, seqName->group, groupNames int createTable(string, string, vector, bool createGroup=false); //namefile, groupfile, selectedGroups, createGroup, int readTable(string, bool, bool); //filename, readGroups, mothurRunning int readTable(ifstream&, bool, bool); //filehandler, readGroups, mothurRunning int readTable(string, bool, bool, vector); //filename, readGroups, mothurRunning, groups to save (if none provided, read all groups) int readTable(ifstream&, bool, bool, vector); //filename, readGroups, mothurRunning, groups to save (if none provided, read all groups) int readTable(string, bool, bool, unordered_set); //filename, readGroups, mothurRunning, namesofSeqs to save (if none provided, read all seqs) int readTable(string, string); //filename, format - if format=fasta, read fasta file and create unique table. if format=name, use namefile to create count table int zeroOutTable(); //turn all counts to zeros int clearTable(); bool isCountTable(string); bool isTableCompressed() { return isCompressed; } int copy(CountTable*); //copy countable bool inTable(string); //accepts sequence name and returns true if sequence is in table, false if not present //all print commands ignore zeroed out seqs vector printCompressedTable(string, vector optionalGroups=nullVector); //nameOfFile, optionalVectorOfGroups (if empty, prints all possible groups), returns names of seqs in table - excludes zeroed reads vector printTable(string); //preserves order in original, defaults compress to state of original file vector printNoGroupsTable(string); //preserves order in original, defaults compress to state of original file vector printTable(string, bool compress); //preserves order in original, printing compressed or not based on compress flag pasted in vector printSortedTable(string); //sorted by seqName int printHeaders(ofstream&, vector optionalGroups=nullVector); set printCompressedHeaders(ofstream&, vector optionalGroups=nullVector); vector getHardCodedHeaders(); //Representative_Sequence, total int printSeq(ofstream&, string); int printCompressedSeq(ofstream&, string, vector optionalGroups=nullVector); bool testGroups(string file); //used to check if file has group data without reading it bool testGroups(string file, vector&); //used to check if file has group data without reading it, return groups if found. bool hasGroupInfo() { return hasGroups; } int getNumGroups() { return (int)groups.size(); } vector getNamesOfGroups() { return groups; } //returns group names, if no group info vector is blank. bool setNamesOfGroups(vector); int addGroup(string); //*****only use with empty table******// int removeGroup(string); //pass in group name int removeGroup(int minSize); //removes any groups with numSeqs < minSize int renameSeq(string, string); //used to change name of sequence for use with trees int setAbund(string, string, int); //set abundance number of seqs for that group for that seq int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in. int push_back(string); //add a sequence int push_back(string, int); //add a sequence int push_back(string, vector); //add a sequence with group info int push_back(string, vector, bool); //add a sequence with group info, no error - ignore dups int remove(string); //remove seq int get(string); //returns unique sequence index for reading distance matrices like NameAssignment int size() { return (int)indexNameMap.size(); } vector getGroups(string); //returns vector of groups represented by this sequence vector getGroupCounts(string); //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function. int getGroupCount(string, string); //returns number of seqs for that group for that seq int getGroupCount(string); // returns total seqs for that group int getNumSeqs(string); //returns total seqs for that seq, 0 if not found int setNumSeqs(string, int); //set total seqs for that seq, return -1 if not found int zeroOutSeq(string); //set total seqs for that seq to 0, return -1 if not found int getNumSeqs() { return total; } //return total number of seqs int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs int getNumSeqsSmallestGroup(); //returns size of smallest group. If no groups, returns total num seqs (includes non uniques) vector getNamesOfSeqs(); //return names of all seqeunce in table vector getNamesOfSeqs(string); //returns names of seqs in specific group in table vector getNamesOfSeqs(vector); //returns names of seqs in specific set of groups in table ListVector getListVector(); map getNameMap(); //sequenceName -> total number of sequences it represents map getNameMap(string); //sequenceName -> total number of sequences it represents in that group private: string filename; MothurOut* m; Utils util; bool hasGroups, isCompressed; int total, uniques; vector groups; vector< vector > counts; //intPair ((int)abund, (int)group). each line in counts represents a sequence line from the count table file(sparse). The vector are sorted by group, so that you can stop search early if group is not found. For example: seq1 10 5 0 0 1 0 0 0 3 0 0 1 0 0 - 13 groups, but seq1 is only present in 4 samples. Let's save space by not storing 0 abunds. seq1's vector (5,0),(1,3),(3,7),(1,10). Group0 = 5, Group3 = 1, Group7 = 3, Group10 = 1. vector totals; vector totalGroups; map indexNameMap; //maps seqName -> vector index in counts. seq1 -> 1 would mean seq1's counts are stored in counts[1]. map indexGroupMap; int find(int seq, int group, bool returnNext); //returns index of intPair for group passed in. If group is not present in seq, returns -1 int getAbund(int seq, int group); //returns abundance of intPair for seq and group passed in. If group is not present in seq, returns 0 vector getItems(string); //returns group counts for a seq passed in, if no group info is in file vector is blank. sorted by group vector expandAbunds(int index); vector expandAbunds(vector& items); vector compressAbunds(vector abunds); void printGroupAbunds(ofstream& out, int index); int sortCountTable(); int sortRow(int); }; #endif mothur-1.48.0/source/datastructures/datavector.hpp000077500000000000000000000032551424121717000223530ustar00rootroot00000000000000#ifndef datavector_h #define datavector_h #include "mothurout.h" #include "utils.hpp" /* This class is parent to listvector, ordervector, rabundvector, sabundvector, sharedordervector, sharedrabundvector, sharedsabundvector. The child classes all contain OTU information in different forms. */ class SharedSAbundVector; class SharedRAbundVectors; class SharedCLRVector; class SharedCLRVectors; class RAbundVector; class RAbundFloatVector; class SAbundVector; class OrderVector; class SharedListVector; class SharedOrderVector; class GroupMap; class DataVector { public: DataVector(){ m = MothurOut::getInstance(); } DataVector(string l) : label(l) { m = MothurOut::getInstance();}; DataVector(const DataVector& dv) : label(dv.label){ m = MothurOut::getInstance();} DataVector(ifstream&) {m = MothurOut::getInstance();} DataVector(ifstream&, GroupMap*){m = MothurOut::getInstance();} virtual ~DataVector(){}; virtual int size() = 0; virtual void clear() = 0; virtual RAbundVector getRAbundVector() = 0; virtual SAbundVector getSAbundVector() = 0; virtual OrderVector getOrderVector(map* hold = nullptr) = 0; virtual void resize(int) = 0; virtual void print(ostream&, map&) {} virtual void print(ostream&, bool) { m->mothurOut("[ERROR]: no print function\n"); } virtual void printHeaders(ostream&) {}; virtual void print(ostream&, bool&) {} virtual void print(ostream&) {} void setLabel(string l) { label = l; } string getLabel() { return label; } protected: string label; MothurOut* m; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/datastructures/designmap.cpp000077500000000000000000000614531424121717000221650ustar00rootroot00000000000000// // designmap.cpp // Mothur // // Created by SarahsWork on 6/17/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include "designmap.h" /************************************************************/ DesignMap::DesignMap(string file) { try { m = MothurOut::getInstance(); defaultClass = "not found"; read(file); } catch(exception& e) { m->errorOut(e, "DesignMap", "DesignMap"); exit(1); } } /************************************************************/ int DesignMap::read(string file) { try { namesOfCategories.clear(); indexCategoryMap.clear(); indexGroupNameMap.clear(); designMap.clear(); ifstream in; util.openInputFile(file, in); string temp = ""; string headers = util.getline(in); gobble(in); vector tempColumnHeaders = util.splitWhiteSpace(headers); if (tempColumnHeaders.size() != 0) { temp = tempColumnHeaders[0]; } else { m->setControl_pressed(true); return 0; } vector columnHeaders; if ((temp == "group") || (temp == "Group")) { columnHeaders.push_back("group"); for (int i = 1; i < tempColumnHeaders.size(); i++) { columnHeaders.push_back(tempColumnHeaders[i]); } }else { m->mothurOut("\n[ERROR]: Expected 'group' and found '" + temp + "'. Mothur expects the design file to have column headers. The first column header should be 'group'.\n\nSomething like: group\ttreatment\tmetadata.\n\nWithout the headers, mothur is unable to determine if the first row is group information or a header of the wrong name. This can result in a dummy group or missing group which will results in errors. Quitting, please correct.\n\n"); m->setControl_pressed(true); in.close(); return 0; } map originalGroupIndexes; for (int i = 1; i < columnHeaders.size(); i++) { namesOfCategories.push_back(columnHeaders[i]); originalGroupIndexes[i-1] = columnHeaders[i]; } if (columnHeaders.size() > 1) { defaultClass = columnHeaders[1]; } else { m->mothurOut("[ERROR]: Your design file contains only one column. Please correct.\n"); m->setControl_pressed(true); } //sort groups to keep consistent with how we store the groups in groupmap sort(namesOfCategories.begin(), namesOfCategories.end()); for (int i = 0; i < namesOfCategories.size(); i++) { indexCategoryMap[namesOfCategories[i]] = i; } int numCategories = namesOfCategories.size(); bool error = false; string group; totalCategories.resize(numCategories); int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { break; } in >> group; gobble(in); util.checkGroupName(group); if (m->getDebug()) { m->mothurOut("[DEBUG]: group = " + group + "\n"); } //if group info, then read it vector categoryValues; categoryValues.resize(numCategories, "not found"); for (int i = 0; i < numCategories; i++) { int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values. string temp = "not found"; in >> temp; categoryValues[thisIndex] = temp; gobble(in); util.checkGroupName(temp); if (m->getDebug()) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); } //do we have this value for this category already map::iterator it = totalCategories[thisIndex].find(temp); if (it == totalCategories[thisIndex].end()) { totalCategories[thisIndex][temp] = 1; } else { totalCategories[thisIndex][temp]++; } } map::iterator it = indexGroupNameMap.find(group); if (it == indexGroupNameMap.end()) { groups.push_back(group); indexGroupNameMap[group] = count; designMap.push_back(categoryValues); count++; }else { error = true; m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct.\n"); } } in.close(); if (error) { m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "DesignMap", "read"); exit(1); } } /************************************************************/ ////groupName, returns default categories value. string DesignMap::get(string groupName) { try { string value = "not found"; map::iterator it2 = indexGroupNameMap.find(groupName); if (it2 == indexGroupNameMap.end()) { m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->setControl_pressed(true); }else { return designMap[it2->second][indexCategoryMap[defaultClass]]; } return value; } catch(exception& e) { m->errorOut(e, "DesignMap", "get"); exit(1); } } /************************************************************/ ////groupName, returns default categories value. vector DesignMap::getCategory() { try { //oldStyle design file group -> treatment. returns treatments set uniqueNames; for (int i = 0; i < groups.size(); i++) { uniqueNames.insert(get(groups[i])); } vector values; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { values.push_back(*it); } return values; } catch(exception& e) { m->errorOut(e, "DesignMap", "getCategory"); exit(1); } } /************************************************************/ ////categoryName, returns category values. vector DesignMap::getCategory(string catName) { try { vector values; map::iterator it2 = indexCategoryMap.find(catName); if (it2 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: category " + catName + " is not in your design file. Please correct.\n"); m->setControl_pressed(true); }else { for (map::iterator it = totalCategories[it2->second].begin(); it != totalCategories[it2->second].end(); it++) { values.push_back(it->first); } } return values; } catch(exception& e) { m->errorOut(e, "DesignMap", "getCategory"); exit(1); } } /************************************************************/ ////groupName, category returns value. example F000132, sex -> male string DesignMap::get(string groupName, string categoryName) { try { string value = "not found"; map::iterator it = indexCategoryMap.find(categoryName); if (it == indexCategoryMap.end()) { m->mothurOut("[ERROR]: category " + categoryName + " is not in your design file. Please correct.\n"); m->setControl_pressed(true); }else { map::iterator it2 = indexGroupNameMap.find(groupName); if (it2 == indexGroupNameMap.end()) { m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->setControl_pressed(true); }else { return designMap[it2->second][it->second]; } } return value; } catch(exception& e) { m->errorOut(e, "DesignMap", "get"); exit(1); } } /************************************************************/ //add group, assumes order is correct int DesignMap::push_back(string group, vector values) { try { util.checkGroupName(group); map::iterator it = indexGroupNameMap.find(group); if (it == indexGroupNameMap.end()) { if (values.size() != getNumCategories()) { m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct.\n"); m->setControl_pressed(true); return 0; } for (int i = 0; i < values.size(); i++) { //do we have this value for this category already map::iterator it = totalCategories[i].find(values[i]); if (it == totalCategories[i].end()) { totalCategories[i][values[i]] = 1; } else { totalCategories[i][values[i]]++; } } int count = indexGroupNameMap.size(); indexGroupNameMap[group] = count; designMap.push_back(values); }else { m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "DesignMap", "push_back"); exit(1); } } /************************************************************/ //set values for group, does not need to set all values. assumes group is in table already int DesignMap::setValues(string group, map values) { try { map::iterator it = indexGroupNameMap.find(group); if (it != indexGroupNameMap.end()) { for (map::iterator it2 = values.begin(); it2 != values.end(); it2++) { map::iterator it3 = indexCategoryMap.find(it2->first); //do we have this category if (it3 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: Your design file does not contain a category called " + it2->first + ". Please correct.\n"); m->setControl_pressed(true); }else { string oldCategory = designMap[it->second][it3->second]; //adjust totals for old category int oldCount = totalCategories[it3->second][oldCategory]; if (oldCount == 1) { totalCategories[it3->second].erase(oldCategory); } else { totalCategories[it3->second][oldCategory]--; } designMap[it->second][it3->second] = it2->second; //reset value //adjust totals for new category map::iterator it4 = totalCategories[it3->second].find(it2->second); if (it4 == totalCategories[it3->second].end()) { totalCategories[it3->second][it2->second] = 1; } else { totalCategories[it3->second][it2->second]++; } } } }else { m->mothurOut("[ERROR]: Your design file does not contain a group named " + group + ". Please correct.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "DesignMap", "setValues"); exit(1); } } /************************************************************/ //set defaultclass void DesignMap::setDefaultClass(string dClass) { try { if (util.inUsersGroups(dClass, namesOfCategories)) { defaultClass = dClass; }else{ m->mothurOut("[WARNING]: Your design file does not contain a category named " + dClass + ". Using default class " + defaultClass + " .\n\n"); } } catch(exception& e) { m->errorOut(e, "DesignMap", "setDefaultClass"); exit(1); } } /************************************************************/ //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: // map early, late>, male> would return 1. Only one group is male and from early or late. int DesignMap::getNumUnique(map > selected) { try { int num = 0; map > indexes; for (map >::iterator it = selected.begin(); it != selected.end(); it++) { map::iterator it2 = indexCategoryMap.find(it->first); if (it2 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct.\n"); m->setControl_pressed(true); return 0; }else { indexes[it2->second] = it->second; } } for (int j = 0; j < designMap.size(); j++) { bool hasAll = true; //innocent til proven guilty for (map >::iterator it = indexes.begin(); it != indexes.end(); it++) { //column number is it->first if (!util.inUsersGroups(designMap[j][it->first], it->second)) { hasAll = false; } } if (hasAll) { num++; } } return num; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNumUnique"); exit(1); } } /************************************************************/ //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: // map early, late>, male> would return 3. All three group have are either male or from early or late. int DesignMap::getNumShared(map > selected) { try { int num = 0; map > indexes; for (map >::iterator it = selected.begin(); it != selected.end(); it++) { map::iterator it2 = indexCategoryMap.find(it->first); if (it2 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct.\n"); m->setControl_pressed(true); return 0; }else { indexes[it2->second] = it->second; } } for (int j = 0; j < designMap.size(); j++) { bool hasAny = false; //innocent til proven guilty for (map >::iterator it = indexes.begin(); it != indexes.end(); it++) { //column number is it->first if (util.inUsersGroups(designMap[j][it->first], it->second)) { hasAny = true; } } if (hasAny) { num++; } } return num; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNumShared"); exit(1); } } /************************************************************/ //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: // map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. vector DesignMap::getNamesUnique(map > selected) { try { vector names; map > indexes; for (map >::iterator it = selected.begin(); it != selected.end(); it++) { map::iterator it2 = indexCategoryMap.find(it->first); if (it2 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct.\n"); m->setControl_pressed(true); return names; }else { indexes[it2->second] = it->second; } } //map int to name map reverse; for (map::iterator it = indexGroupNameMap.begin(); it != indexGroupNameMap.end(); it++) { reverse[it->second] = it->first; } for (int j = 0; j < designMap.size(); j++) { bool hasAll = true; //innocent til proven guilty for (map >::iterator it = indexes.begin(); it != indexes.end(); it++) { //column number is it->first if (!util.inUsersGroups(designMap[j][it->first], it->second)) { hasAll = false; } } if (hasAll) { map::iterator it = reverse.find(j); if (it == reverse.end()) { m->mothurOut("[ERROR]: should never get here, oops. Please correct.\n"); m->setControl_pressed(true); return names; }else { names.push_back(it->second); } } } return names; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNamesUnique"); exit(1); } } /************************************************************/ //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: // map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. vector DesignMap::getNamesShared(map > selected) { try { vector names; map > indexes; for (map >::iterator it = selected.begin(); it != selected.end(); it++) { map::iterator it2 = indexCategoryMap.find(it->first); if (it2 == indexCategoryMap.end()) { m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct.\n"); m->setControl_pressed(true); return names; }else { indexes[it2->second] = it->second; } } //map int to name map reverse; for (map::iterator it = indexGroupNameMap.begin(); it != indexGroupNameMap.end(); it++) { reverse[it->second] = it->first; } for (int j = 0; j < designMap.size(); j++) { bool hasAny = false; //innocent til proven guilty for (map >::iterator it = indexes.begin(); it != indexes.end(); it++) { //column number is it->first if (util.inUsersGroups(designMap[j][it->first], it->second)) { hasAny = true; } } if (hasAny) { map::iterator it = reverse.find(j); if (it == reverse.end()) { m->mothurOut("[ERROR]: should never get here, oops. Please correct.\n"); m->setControl_pressed(true); return names; }else { names.push_back(it->second); } } } return names; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNamesShared"); exit(1); } } /************************************************************/ //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: // map early, late>, male> would return F000132, F000142, F000138. All three group have are either male or from early or late. vector DesignMap::getNamesGroups(string category, string value) { try { vector names; map::iterator it = indexCategoryMap.find(category); if (it == indexCategoryMap.end()) { m->mothurOut("[ERROR]: category " + category + " is not in your design file. Please correct.\n"); m->setControl_pressed(true); }else { int column = it->second; //map int to name map reverse; for (map::iterator it2 = indexGroupNameMap.begin(); it2 != indexGroupNameMap.end(); it2++) { reverse[it2->second] = it2->first; } for (int i = 0; i < designMap.size(); i++) { if (designMap[i][column] == value) { map::iterator it2 = reverse.find(i); if (it2 == reverse.end()) { m->mothurOut("[ERROR]: should never get here, oops. Please correct.\n"); m->setControl_pressed(true); return names; }else { names.push_back(it2->second); } } } } return names; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNamesGroups"); exit(1); } } /************************************************************/ //assume default category and get names groups that match any values in vector passed in. = F000142, F000132. vector DesignMap::getNamesGroups(vector sets) { try { vector names; if (sets.size() == 0) { return names; } map > temp; temp[defaultClass] = sets; names = getNamesShared(temp); return names; } catch(exception& e) { m->errorOut(e, "DesignMap", "getNamesGroups"); exit(1); } } /************************************************************/ int DesignMap::print(ofstream& out) { try { out << "group"; for (int i = 0; i < namesOfCategories.size(); i++) { out << '\t' << namesOfCategories[i]; } out << endl; map reverse; //use this to preserve order for (map::iterator it = indexGroupNameMap.begin(); it !=indexGroupNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < designMap.size(); i++) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap out << itR->second; for (int j = 0; j < namesOfCategories.size(); j++) { out << '\t' << designMap[i][j]; } out << endl; } } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "DesignMap", "print"); exit(1); } } /************************************************************/ //print specific categories int DesignMap::printCategories(ofstream& out, vector cats) { try { out << "group"; for (int i = 0; i < namesOfCategories.size(); i++) { if (util.inUsersGroups(namesOfCategories[i], cats)) { out << '\t' << namesOfCategories[i]; } } out << endl; map reverse; //use this to preserve order for (map::iterator it = indexGroupNameMap.begin(); it !=indexGroupNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < designMap.size(); i++) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap out << itR->second; for (int j = 0; j < namesOfCategories.size(); j++) { if (util.inUsersGroups(namesOfCategories[i], cats)) { out << '\t' << designMap[i][j]; } } out << endl; } } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "DesignMap", "printCategories"); exit(1); } } /************************************************************/ //print specific groups int DesignMap::printGroups(ofstream& out, vector groups) { try { int numSelected = 0; out << "group"; for (int i = 0; i < namesOfCategories.size(); i++) { out << '\t' << namesOfCategories[i]; } out << endl; map reverse; //use this to preserve order for (map::iterator it = indexGroupNameMap.begin(); it !=indexGroupNameMap.end(); it++) { reverse[it->second] = it->first; } for (int i = 0; i < designMap.size(); i++) { map::iterator itR = reverse.find(i); if (itR != reverse.end()) { //will equal end if groups were removed because remove just removes from indexNameMap if (util.inUsersGroups(itR->second, groups)) { out << itR->second; for (int j = 0; j < namesOfCategories.size(); j++) { out << '\t' << designMap[i][j]; } out << endl; numSelected++; } } } out.close(); return numSelected; } catch(exception& e) { m->errorOut(e, "DesignMap", "printGroups"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/designmap.h000077500000000000000000000105111424121717000216170ustar00rootroot00000000000000// // designmap.h // Mothur // // Created by SarahsWork on 6/17/13. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef __Mothur__designmap__ #define __Mothur__designmap__ #include "mothurout.h" #include "utils.hpp" /* This class is a representation of the design file. group treatment sex age F000142 Early female young F000132 Late male old F000138 Mid male old */ class DesignMap { public: DesignMap() { m = MothurOut::getInstance(); defaultClass = "not found"; } DesignMap(string); //reads file as well ~DesignMap() = default; //read designfile name int read(string); //like groupMap getGroup string get(string, string); //groupName, category returns value. example F000132, sex -> male string get(string); //groupName, returns default categories value. example F000132, -> late //like groupMap getNamesOfGroups vector getCategory(string); //categoryName, returns values. example treatment, -> early,late,mid vector getCategory(); //returns default categories values. example treatment, -> early,late,mid int setValues(string, map); //groupName, map int push_back(string, vector); //groupName, vector - assumes you put values in order of getNamesOfCategories //refers to header labels vector getNamesOfCategories() { sort(namesOfCategories.begin(), namesOfCategories.end()); return namesOfCategories; } //set deault treatment, mothur sets this to column 2. void setDefaultClass(string); string getDefaultClass() { return defaultClass; } //number of treatments / columns in file int getNumCategories() { return (int)namesOfCategories.size(); } //number of groups / rows in file int getNumGroups() { return (int)designMap.size(); } //options to select groups based on values vector getNamesGroups() { return groups; } vector getNamesGroups(string, string); //get names groups with category and value. vector getNamesGroups(vector); //assume default category and get names groups that match any values in vector passed in. = F000142, F000132. //options to selects - may want to expand on these int getNumUnique(map >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: // map early, late>, male> would return 1. Only one group is male and from early or late. int getNumShared(map >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: // map early, late>, male> would return 3. All three group have are either male or from early or late. vector getNamesUnique(map >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: // map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. vector getNamesShared(map >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: // map early, late>, male> would return F000132, F000142, F000138. All three group have are either male or from early or late. int print(ofstream&); int printCategories(ofstream&, vector); //print certain categories int printGroups(ofstream&, vector); //print certain Groups private: string defaultClass; MothurOut* m; vector< map > totalCategories; //for each category, total groups assigned to it. vector[0] early -> 1, vector[1] male -> 2 vector groups; vector namesOfCategories; vector< vector > designMap; map indexGroupNameMap; //maps groupName to row in values map indexCategoryMap; //maps category to column in values Utils util; }; #endif /* defined(__Mothur__designmap__) */ mothur-1.48.0/source/datastructures/distancedb.cpp000077500000000000000000000077001424121717000223110ustar00rootroot00000000000000/* * distancedb.cpp * * * Created by Pat Schloss on 12/29/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "searchdatabase.hpp" #include "sequence.hpp" #include "distancedb.hpp" #include "eachgapignore.h" /**************************************************************************************************/ DistanceDB::DistanceDB() : SearchDatabase() { try { templateAligned = true; templateSeqsLength = 0; distCalculator = new eachGapIgnoreTermGapDist(1.0); } catch(exception& e) { m->errorOut(e, "DistanceDB", "DistanceDB"); exit(1); } } /**************************************************************************************************/ void DistanceDB::addSequence(Sequence seq) { try { //are the template sequences aligned if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); } if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); } data.push_back(seq); } catch(exception& e) { m->errorOut(e, "DistanceDB", "addSequence"); exit(1); } } /**************************************************************************************************/ //returns indexes to top matches vector DistanceDB::findClosestSequences(Sequence* query, int numWanted, vector& Scores) const{ try { vector topMatches; Scores.clear(); bool templateSameLength = true; string sequence = query->getAligned(); vector dists; float searchScore = -1.0; if (numWanted > data.size()){ m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); } lock_guard guard(mutex); if (sequence.length() != templateSeqsLength) { templateSameLength = false; } if (templateSameLength && templateAligned) { if (numWanted != 1) { dists.resize(data.size()); //calc distance from this sequence to every sequence in the template for (int i = 0; i < data.size(); i++) { double dist = distCalculator->calcDist(*query, data[i]); //save distance to each template sequence dists[i].seq1 = -1; dists[i].seq2 = i; dists[i].dist = dist; } sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest //save distance of best match searchScore = dists[0].dist; Scores.push_back(searchScore); //fill topmatches with numwanted closest sequences indexes for (int i = 0; i < numWanted; i++) { topMatches.push_back(dists[i].seq2); Scores.push_back(dists[i].dist); } }else { int bestIndex = 0; float smallDist = 100000; for (int i = 0; i < data.size(); i++) { double dist = distCalculator->calcDist(*query, data[i]); //are you smaller? if (dist < smallDist) { bestIndex = i; smallDist = dist; } } searchScore = smallDist; topMatches.push_back(bestIndex); Scores.push_back(smallDist); } }else{ m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine(); exit(1); } return topMatches; } catch(exception& e) { m->errorOut(e, "DistanceDB", "findClosestSequence"); exit(1); } } /**************************************************************************************************/ bool DistanceDB::isAligned(string seq){ try { bool aligned; int pos = seq.find_first_of(".-"); if (pos != seq.npos) { aligned = true; }else { aligned = false; } return aligned; } catch(exception& e) { m->errorOut(e, "DistanceDB", "isAligned"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/distancedb.hpp000077500000000000000000000013551424121717000223160ustar00rootroot00000000000000#ifndef DISTANCEDB_HPP #define DISTANCEDB_HPP /* * distancedb.hpp * * * Created by westcott on 1/27/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "calculator.h" class DistanceDB : public SearchDatabase { public: DistanceDB(); ~DistanceDB() { delete distCalculator; } void generateDB(){}; //doesn't generate a search db void addSequence(Sequence); string getName(int i) { return data[i].getName(); } vector findClosestSequences(Sequence*, int, vector&) const; // returns indexes of n closest sequences to query private: vector data; DistCalc* distCalculator; int templateSeqsLength; bool templateAligned; bool isAligned(string); }; #endif mothur-1.48.0/source/datastructures/fastamap.cpp000077500000000000000000000127321424121717000220060ustar00rootroot00000000000000/* * fastamap.cpp * mothur * * Created by Sarah Westcott on 1/16/09. * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. * */ #include "fastamap.h" #include "sequence.hpp" /*******************************************************************************/ void FastaMap::readFastaFile(string inFileName) { try { ifstream in; util.openInputFile(inFileName, in); string name, sequence, line; sequence = ""; string temp; map::iterator itName; while(!in.eof()){ if (m->getControl_pressed()) { break; } Sequence currSeq(in); name = currSeq.getName(); if (name != "") { sequence = currSeq.getAligned(); itName = seqmap.find(name); if (itName == seqmap.end()) { seqmap[name] = sequence; } else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct.\n"); } map::iterator it = data.find(sequence); if (it == data.end()) { //it's unique. data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. // data[sequence].groupnumber = 1; data[sequence].names = name; }else { // its a duplicate. data[sequence].names += "," + name; // data[sequence].groupnumber++; } } gobble(in); } in.close(); } catch(exception& e) { m->errorOut(e, "FastaMap", "readFastaFile"); exit(1); } } /*******************************************************************************/ void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prints data ifstream oldNameFile; util.openInputFile(oldNameFileName, oldNameFile); map oldNameMap; map::iterator itName; string name, list; while(!oldNameFile.eof()){ if (m->getControl_pressed()) { break; } oldNameFile >> name; gobble(oldNameFile); oldNameFile >> list; oldNameMap[name] = list; gobble(oldNameFile); } oldNameFile.close(); ifstream inFASTA; util.openInputFile(inFastaFile, inFASTA); string sequence; while(!inFASTA.eof()){ if (m->getControl_pressed()) { break; } Sequence currSeq(inFASTA); name = currSeq.getName(); if (name != "") { sequence = currSeq.getAligned(); itName = seqmap.find(name); if (itName == seqmap.end()) { seqmap[name] = sequence; } else { m->mothurOut("You already have a sequence named " + name + ", sequence names must be unique, please correct.\n"); } seqmap[name] = sequence; map::iterator it = data.find(sequence); if (it == data.end()) { //it's unique. data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. // data[sequence].groupnumber = 1; data[sequence].names = oldNameMap[name]; }else { // its a duplicate. data[sequence].names += "," + oldNameMap[name]; // data[sequence].groupnumber++; } } gobble(inFASTA); } inFASTA.close(); } /*******************************************************************************/ string FastaMap::getGroupName(string seq) { //pass a sequence name get its group return data[seq].groupname; } /*******************************************************************************/ string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s. return data[seq].names; } /*******************************************************************************/ string FastaMap::getSequence(string name) { map::iterator it = seqmap.find(name); if (it == seqmap.end()) { return "not found"; } else { return it->second; } } /*******************************************************************************/ void FastaMap::push_back(string name, string seq) { map::iterator it = data.find(seq); if (it == data.end()) { //it's unique. data[seq].groupname = name; //group name will be the name of the first duplicate sequence found. data[seq].names = name; }else { // its a duplicate. data[seq].names += "," + name; } seqmap[name] = seq; } /*******************************************************************************/ int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences return data.size(); } /*******************************************************************************/ void FastaMap::printNamesFile(string outFileName){ //prints data try { ofstream outFile; util.openOutputFile(outFileName, outFile); // two column file created with groupname and them list of identical sequence names for (map::iterator it = data.begin(); it != data.end(); it++) { if (m->getControl_pressed()) { break; } outFile << it->second.groupname << '\t' << it->second.names << endl; } outFile.close(); } catch(exception& e) { m->errorOut(e, "FastaMap", "printNamesFile"); exit(1); } } /*******************************************************************************/ void FastaMap::printCondensedFasta(string outFileName){ //prints data try { ofstream out; util.openOutputFile(outFileName, out); //creates a fasta file for (map::iterator it = data.begin(); it != data.end(); it++) { if (m->getControl_pressed()) { break; } out << ">" << it->second.groupname << endl; out << it->first << endl; } out.close(); } catch(exception& e) { m->errorOut(e, "FastaMap", "printCondensedFasta"); exit(1); } } /*******************************************************************************/ mothur-1.48.0/source/datastructures/fastamap.h000077500000000000000000000033051424121717000214470ustar00rootroot00000000000000#ifndef FASTAMAP_H #define FASTAMAP_H /* * fastamap.h * mothur * * Created by Sarah Westcott on 1/16/09. * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" /* This class represents the fasta file. It reads a fasta file a populates the internal data structure "data". Data is a map where the key is the sequence and the value is a struct containing the sequences groupname, a list of the sequences names who have the same sequence and a number of how many sequence names there are. */ class FastaMap { public: FastaMap() { m = MothurOut::getInstance(); } ~FastaMap() = default;; string getGroupName(string); //pass a sequence name get its group string getNames(string); //pass a sequence get the string of names in the group separated by ','s. void push_back(string, string); //sequencename, sequence int sizeUnique(); //returns number of unique sequences void printNamesFile(string); //produces a 2 column file with the groupname in the first column and the names in the second column - a names file. void printCondensedFasta(string); //produces a fasta file. void readFastaFile(string); void readFastaFile(string, string); string getSequence(string); //pass it a name of a sequence, it returns the sequence. private: struct group { string groupname; //the group name for identical sequences, will be set to the first sequence found. string names; //the names of the sequence separated by ','. }; map data; //sequence, groupinfo - condensed representation of file map seqmap; //name, sequence - uncondensed representation of file MothurOut* m; Utils util; }; #endif mothur-1.48.0/source/datastructures/fastqread.cpp000077500000000000000000000354031424121717000221640ustar00rootroot00000000000000// // fastqread.cpp // Mothur // // Created by Sarah Westcott on 1/26/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #include "fastqread.h" /*******************************************************************************/ FastqRead::FastqRead() { try { m = MothurOut::getInstance(); format = "illumina1.8+"; name = ""; sequence = ""; scores.clear(); //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } /*******************************************************************************/ FastqRead::FastqRead(Sequence s, QualityScores q) { try { m = MothurOut::getInstance(); format = "illumina1.8+"; //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } if (s.getName() != q.getName()) { m->mothurOut("[ERROR]: sequence name does not match quality score name. Found sequence named " + s.getName() + " quality scores named " + q.getName() + " Cannot construct fastq object.\n"); m->setControl_pressed(true); } else { name = s.getName(); comment = s.getComment(); sequence = s.getUnaligned(); scores = q.getScores(); scoreString = convertQual(scores); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } /*******************************************************************************/ FastqRead::FastqRead(Sequence s, QualityScores q, string f) { try { m = MothurOut::getInstance(); format = f; //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } if (s.getName() != q.getName()) { m->mothurOut("[ERROR]: sequence name does not match quality score name. Found sequence named " + s.getName() + " quality scores named " + q.getName() + " Cannot construct fastq object.\n"); m->setControl_pressed(true); } else { name = s.getName(); comment = s.getComment(); sequence = s.getUnaligned(); scores = q.getScores(); scoreString = convertQual(scores); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } /*******************************************************************************/ FastqRead::FastqRead(string f) { try { m = MothurOut::getInstance(); format = f; name = ""; sequence = ""; scores.clear(); //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } /*******************************************************************************/ FastqRead::FastqRead(string f, string n, string s, vector sc) { try { m = MothurOut::getInstance(); format = f; name = n; sequence = s; scores = sc; //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499))); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } /*******************************************************************************/ FastqRead::FastqRead(ifstream& in, bool& ignore, string f) { try { m = MothurOut::getInstance(); ignore = false; format = f; //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); } //read sequence name string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); name = ""; if (pieces.size() != 0) { name = pieces[0]; } if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read.\n"); ignore=true; } else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read.\n"); ignore=true; } else { name = name.substr(1); } if (pieces.size() > 1) { pieces.erase(pieces.begin()); comment = util.getStringFromVector(pieces, " "); } //read sequence sequence = util.getline(in); gobble(in); if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; } //read sequence name line = util.getline(in); gobble(in); pieces = util.splitWhiteSpace(line); string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; } if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; } else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; } else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } } //read quality scores string quality = util.getline(in); gobble(in); if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; } //sanity check sequence length and number of quality scores match if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } } if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; } scoreString = quality; scores = convertQual(quality); util.checkName(name); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + " " + sequence + " " + quality + "\n"); } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } //********************************************************************************************************************** #ifdef USE_BOOST FastqRead::FastqRead(boost::iostreams::filtering_istream& in, bool& ignore, string f) { try { m = MothurOut::getInstance(); ignore = false; format = f; if (in.eof()) { ignore = true; } else { //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference. for (int i = -64; i < 65; i++) { char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)); convertTable.push_back(temp); } //read sequence name string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); name = ""; if (pieces.size() != 0) { name = pieces[0]; } if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read.\n"); ignore=true; } else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read.\n"); ignore=true; } else { name = name.substr(1); } if (pieces.size() > 1) { pieces.erase(pieces.begin()); comment = util.getStringFromVector(pieces, " "); } //read sequence sequence = util.getline(in); gobble(in); if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; } //read sequence name line = util.getline(in); gobble(in); pieces = util.splitWhiteSpace(line); string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; } if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; } else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; } else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } } //read quality scores string quality = util.getline(in); gobble(in); if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; } //sanity check sequence length and number of quality scores match if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } } if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; } scoreString = quality; scores = convertQual(quality); util.checkName(name); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + " " + sequence + " " + quality + "\n"); } } } catch(exception& e) { m->errorOut(e, "FastqRead", "FastqRead"); exit(1); } } #endif //********************************************************************************************************************** vector FastqRead::convertQual(string qual) { try { vector qualScores; bool negativeScores = false; for (int i = 0; i < qual.length(); i++) { int temp = 0; temp = int(qual[i]); if (format == "illumina") { temp -= 64; //char '@' }else if (format == "illumina1.8+") { temp -= int('!'); //char '!' //33 }else if (format == "solexa") { temp = int(convertTable[temp]); //convert to sanger temp -= int('!'); //char '!' //33 }else { temp -= int('!'); //char '!' //33 } if (temp < 0) { negativeScores = true; temp = 0; } qualScores.push_back(temp); } if (negativeScores) { m->mothurOut("[ERROR]: finding negative quality scores, do you have the right format selected? http://en.wikipedia.org/wiki/FASTQ_format#Encoding \n"); m->setControl_pressed(true); } return qualScores; } catch(exception& e) { m->errorOut(e, "FastqRead", "convertQual"); exit(1); } } //********************************************************************************************************************** string FastqRead::convertQual(vector qual) { try { string scoreString = ""; for (int i = 0; i < qual.size(); i++) { int controlChar = int('!'); if (format == "illumina") { controlChar = int('@'); } int temp = qual[i] + controlChar; if (format == "solexa") { temp = convertBackTable[temp]; } char qualChar = (char) temp; scoreString += qualChar; } return scoreString; } catch(exception& e) { m->errorOut(e, "FastqRead", "convertQual"); exit(1); } } //********************************************************************************************************************** void FastqRead::setScores(vector qual) { try { scoreString = ""; scores = qual; for (int i = 0; i < qual.size(); i++) { int controlChar = int('!'); if (format == "illumina") { controlChar = int('@'); } int temp = qual[i] + controlChar; if (format == "solexa") { temp = convertBackTable[temp]; } char qualChar = (char) temp; scoreString += qualChar; } } catch(exception& e) { m->errorOut(e, "FastqRead", "setScores"); exit(1); } } //********************************************************************************************************************** Sequence FastqRead::getSequence() { try { Sequence temp(name, sequence); return temp; } catch(exception& e) { m->errorOut(e, "FastqRead", "getSequence"); exit(1); } } //********************************************************************************************************************** void FastqRead::printFastq(ostream& out) { try { out << "@" << name << " " << comment << endl; out << sequence << endl; out << "+" << endl; out << scoreString << endl; } catch(exception& e) { m->errorOut(e, "FastqRead", "printFastq"); exit(1); } } //********************************************************************************************************************** QualityScores FastqRead::getQuality() { try { QualityScores temp(name, scores); return temp; } catch(exception& e) { m->errorOut(e, "FastqRead", "getQuality"); exit(1); } } /*******************************************************************************/ mothur-1.48.0/source/datastructures/fastqread.h000077500000000000000000000042051424121717000216250ustar00rootroot00000000000000// // fastqread.h // Mothur // // Created by Sarah Westcott on 1/26/15. // Copyright (c) 2015 Schloss Lab. All rights reserved. // #ifndef Mothur_fastqread_h #define Mothur_fastqread_h #include "mothur.h" #include "mothurout.h" #include "sequence.hpp" #include "qualityscores.h" /* This class is a representation of a fastqread. If no format is given, defaults to illumina1.8+. @M00704:50:000000000-A3G0K:1:1101:15777:1541 2:N:0:0 NCTCTACCAGGCCAAGCATAATGGGCGGGATCGTATCGAAGTAGCCTTGATGGGTAAGGTTGCCTGAGTTTCACAAGACAGATTACAGAGGTCGTCTATGCCCTGTCTCTTATACACATCTGACGCTGCCGACGAATAGAGAGGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAATATCGTCTAGGCCATGTGTGACGCTCGGTCTGGGCTTCACGAACAGGGGGTCCGCCATGTACCGCGCGCTC + #>>3AAFFFBAAFAGGFFFFGFHHHGGGG0EFGFHHFGHBFFGFDGHFGEGFFEBEGFCBFGFGFF2F4B3EGFHHHEHEHGHHH3FGHFG3BEEFHHHGGEGHFFHHEFGHHFHFHHF1B?FFD/AD/FC/<@D-.FGBF1<<<<< sc); FastqRead(ifstream&, bool&, string f); #ifdef USE_BOOST FastqRead(boost::iostreams::filtering_istream&, bool&, string f); #endif ~FastqRead() = default; void setFormat(string f) { format = f; } string getFormat() { return format; } string getName() { return name; } void setName(string n) { name = n; } string getSeq() { return sequence; } void setSeq(string s) { sequence = s; } vector getScores() { return scores; } void setScores(vector s); void printFastq(ostream&); Sequence getSequence(); QualityScores getQuality(); private: MothurOut* m; Utils util; vector scores; string name, comment; string sequence; string scoreString; string format; vector convertTable; vector convertBackTable; vector convertQual(string qual); string convertQual(vector); }; #endif mothur-1.48.0/source/datastructures/filefile.cpp000066400000000000000000000214401424121717000217620ustar00rootroot00000000000000// // filefile.cpp // Mothur // // Created by Sarah Westcott on 12/5/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "filefile.hpp" #include "utils.hpp" /**************************************************************************************************/ FileFile::FileFile(string f, string md) : filename(f), mode(md) { try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); mpath = current->getProgramPath(); columnWithGroups = false; fileOption = 0; gz = false; hasIndex = false; read(f, mode); } catch(exception& e) { m->errorOut(e, "FileFile", "FileFile"); exit(1); } } /************************************************************************************************** FileFile::FileFile(){ try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); inputDir = current->getInputDir(); mpath = current->getProgramPath(); fileOption = 0; gz = false; hasIndex = false; filename = ""; mode = ""; } catch(exception& e) { m->errorOut(e, "FileFile", "FileFile"); exit(1); } } file option 1 sfffile1 oligosfile1 sfffile2 oligosfile2 ... file option 2 fastqfile1 oligosfile1 fastqfile2 oligosfile2 ... file option 3 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 4 group fastqfile fastqfile group fastqfile fastqfile group fastqfile fastqfile ... file option 5 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file ... */ /**************************************************************************************************/ vector< vector > FileFile::read(string f, string mode){ try { filename = f; bool allGZ = true; bool allPlainTxt = true; ifstream in; util.openInputFile(filename, in); while(!in.eof()) { if (m->getControl_pressed()) { return files; } bool skip = false; string line = util.getline(in); gobble(in); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + line +"\n"); } if(line[0] == '#'){ } //ignore else { vector pieces = util.splitWhiteSpace(line); string group = ""; string forward, reverse, findex, rindex; skip = validateFiles(pieces, forward, reverse, findex, rindex, group); if (!skip) { //good pair file2Group[files.size()] = group; if (((findex != "") || (rindex != ""))) { hasIndex = true; } if ((mode == "contigs") || (mode == "sra")) { setGZ(forward, reverse, findex, rindex, allGZ, allPlainTxt); } vector pair; pair.push_back(forward); pair.push_back(reverse); pair.push_back(findex); pair.push_back(rindex); files.push_back(pair); } } } in.close(); if ((mode == "contigs") || (mode == "sra")){ if (allGZ) { gz = true; } else { gz = false; } } if (files.size() == 0) { m->setControl_pressed(true); } return files; } catch(exception& e) { m->errorOut(e, "FileFile", "read"); exit(1); } } /**************************************************************************************************/ bool FileFile::validateFiles(vector pieces, string& forward, string& reverse, string& findex, string& rindex, string& group){ try { bool skip = false; //innocent until proven guilty group = ""; if (pieces.size() == 2) { if (mode == "parsefastqpacbio") { group = pieces[0]; util.checkGroupName(group); forward = pieces[1]; reverse = ""; }else { forward = pieces[0]; reverse = pieces[1]; group = ""; } findex = ""; rindex = ""; fileOption = 1; }else if (pieces.size() == 3) { group = pieces[0]; util.checkGroupName(group); forward = pieces[1]; reverse = pieces[2]; if ((reverse == "none") || (reverse == "NONE")){ reverse = "NONE"; } findex = ""; rindex = ""; fileOption = 2; columnWithGroups = true; }else if (pieces.size() == 4) { forward = pieces[0]; reverse = pieces[1]; findex = pieces[2]; rindex = pieces[3]; if ((findex == "none") || (findex == "NONE")){ findex = "NONE"; } if ((rindex == "none") || (rindex == "NONE")){ rindex = "NONE"; } fileOption = 3; }else { m->mothurOut("[ERROR]: file lines can be 2, 3, or 4 columns. The forward fastq files in the first column and their matching reverse fastq files in the second column, or a groupName then forward fastq file and reverse fastq file, or forward fastq file then reverse fastq then forward index and reverse index file. If you only have one index file add 'none' for the other one. \n"); m->setControl_pressed(true); } if (m->getDebug()) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); } //check to make sure both are able to be opened bool openForward = util.checkLocations(forward, current->getLocations()); if (openForward) { if (util.isBlank(forward)) { m->mothurOut("[WARNING]: " + forward + " is blank, skipping.\n"); skip=true; } }else { m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n"); } bool openReverse = true; if ((reverse != "") && (reverse != "NONE")){ openReverse = util.checkLocations(reverse, current->getLocations()); if (openReverse) { if (util.isBlank(reverse)) { m->mothurOut("[WARNING]: " + reverse + " is blank, skipping.\n"); skip=true; } }else { m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n"); } } bool openFindex = true; if ((findex != "") && (findex != "NONE")){ openFindex = util.checkLocations(findex, current->getLocations()); if (openFindex) { if (util.isBlank(findex)) { m->mothurOut("[WARNING]: " + findex + " is blank, skipping.\n"); skip=true; } }else { m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n"); } } bool openRindex = true; if ((rindex != "") && (rindex != "NONE")) { openRindex = util.checkLocations(rindex, current->getLocations()); if (openRindex) { if (util.isBlank(rindex)) { m->mothurOut("[WARNING]: " + rindex + " is blank, skipping.\n"); skip=true; } }else { m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n"); } } if ((openForward) && (openReverse) && (openFindex) && (openRindex) && (!skip)) { //good pair return false; }else { return true; } } catch(exception& e) { m->errorOut(e, "FileFile", "validateFiles"); exit(1); } } /**************************************************************************************************/ void FileFile::setGZ(string forward, string reverse, string findex, string rindex, bool& allGZ, bool& allPlainTxt){ try { #ifdef USE_BOOST if (util.isGZ(forward)[1]) { allPlainTxt = false; } else { allGZ = false; } if (util.isGZ(reverse)[1]) { allPlainTxt = false; } else { allGZ = false; } if ((findex != "") && (findex != "NONE")) { if (util.isGZ(findex)[1]) { allPlainTxt = false; } else { allGZ = false; } } if ((rindex != "") && (rindex != "NONE")) { if (util.isGZ(rindex)[1]) { allPlainTxt = false; } else { allGZ = false; } } if (!allGZ && !allPlainTxt) { //mixed bag of files, uh oh... m->mothurOut("[ERROR]: Your files must all be in compressed .gz form or all in plain text form. Please correct. \n"); m->setControl_pressed(true); } #else allGZ=false; #endif } catch(exception& e) { m->errorOut(e, "FileFile", "setGZ"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/filefile.hpp000066400000000000000000000047531424121717000217770ustar00rootroot00000000000000// // filefile.hpp // Mothur // // Created by Sarah Westcott on 12/5/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef filefile_hpp #define filefile_hpp #include "mothurout.h" #include "utils.hpp" #include "currentfile.h" /* file option 1 sfffile1 oligosfile1 sfffile2 oligosfile2 ... file option 2 fastqfile1 oligosfile1 fastqfile2 oligosfile2 ... file option 3 ffastqfile1 rfastqfile1 ffastqfile2 rfastqfile2 ... file option 4 - only vaild if mode is set to parsefastqpacbio group1 pacBiofastqfile1 group2 pacBiofastqfile2 ... file option 5 group fastqfile fastqfile group fastqfile fastqfile group fastqfile fastqfile ... file option 6 My.forward.fastq My.reverse.fastq none My.rindex.fastq //none is an option is no forward or reverse index file ... ********* fileOption; //1 -> 2 column(4 forms of 2 column), 2 -> 3 column, 3 -> 4 column ****************** */ /**************************************************************************************************/ class FileFile { public: FileFile(string, string); //provide file file and read file, mode (ie. contigs) mode options include: mimarks,contigs,parseFastq,renameSeqs,sra,parsefastqpacbio ~FileFile() = default; vector< vector > getFiles() { return files; } bool isColumnWithGroupNames() { return columnWithGroups; } int getFileFormat() { return fileOption; } bool isGZ() { return gz; } //are files listed in file compressed bool containsIndexFiles() { return hasIndex; } //indicates oligos file is required map getFile2Group() { return file2Group; } //fileIndex2GroupName, files[0]'s group is -> file2Group[0] protected: MothurOut* m; CurrentFile* current; Utils util; string filename, mode, mpath; bool gz, hasIndex, columnWithGroups; int fileOption; //1 -> 2 column(3 forms of 2 column), 2 -> 3 column, 3 -> 4 column vector< vector > files; map file2Group; vector< vector > read(string, string); //read file, used with () constructor bool validateFiles(vector pieces, string& forward, string& reverse, string& findex, string& rindex, string& group); //checks locations, abletoOPen, fileOPtion void setGZ(string forward, string reverse, string findex, string rindex, bool&, bool&); }; /**************************************************************************************************/ #endif /* filefile_hpp */ mothur-1.48.0/source/datastructures/flowdata.cpp000077500000000000000000000213361424121717000220130ustar00rootroot00000000000000/* * flowdata.cpp * Mothur * * Created by Pat Schloss on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "flowdata.h" //********************************************************************************************************************** FlowData::FlowData(){} //********************************************************************************************************************** FlowData::~FlowData(){ /* do nothing */ } //********************************************************************************************************************** FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP, string baseFlow) : numFlows(numFlows), signalIntensity(signal), noiseIntensity(noise), maxHomoP(maxHomoP), baseFlow(baseFlow){ try { m = MothurOut::getInstance(); flowData.assign(numFlows, 0); // baseFlow = "TACG"; seqName = ""; locationString = ""; } catch(exception& e) { m->errorOut(e, "FlowData", "FlowData"); exit(1); } } //********************************************************************************************************************** bool FlowData::getNext(ifstream& flowFile){ try { //read name seqName = getSequenceName(flowFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: flow = " + seqName + " "); } //read end flow flowFile >> endFlow; if (m->getDebug()) { m->mothurOut(toString(endFlow) + " "); } if (m->getControl_pressed()) { return false; } //read flowgrams for(int i=0;i> flowData[i]; if (m->getDebug()) { m->mothurOut(toString(flowData[i]) + " "); } } if (m->getDebug()) { m->mothurOut("\n"); } gobble(flowFile); //process flowgrams updateEndFlow(); translateFlow(); if(flowFile){ return true; } else { return false; } } catch(exception& e) { m->errorOut(e, "FlowData", "getNext"); exit(1); } } //******************************************************************************************************************** string FlowData::getSequenceName(ifstream& flowFile) { try { string name = ""; flowFile >> name; if (name.length() != 0) { util.checkName(name); } else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } return name; } catch(exception& e) { m->errorOut(e, "FlowData", "getSequenceName"); exit(1); } } //********************************************************************************************************************** void FlowData::updateEndFlow(){ try{ if (baseFlow.length() > 4) { return; } //int currLength = 0; float maxIntensity = (float) maxHomoP + 0.49; int deadSpot = 0; while(deadSpot < endFlow){ int signal = 0; int noise = 0; for(int i=0;i signalIntensity){ signal++; if(intensity < noiseIntensity || intensity > maxIntensity){ noise++; } } } if(noise > 0 || signal == 0){ break; } deadSpot += baseFlow.length(); } endFlow = deadSpot; } catch(exception& e) { m->errorOut(e, "FlowData", "findDeadSpot"); exit(1); } } //********************************************************************************************************************** //TATGCT //1 0 0 0 0 1 //then the second positive flow is for a T, but you saw a T between the last and previous flow adn it wasn't positive, so something is missing //Becomes TNT void FlowData::translateFlow(){ try{ sequence = ""; set charInMiddle; int oldspot = -1; bool updateOld = false; for(int i=0;i= 1) { if (oldspot == -1) { updateOld = true; } else { //check for bases inbetween two 1's if (charInMiddle.count(base) != 0) { //we want to covert to an N sequence = sequence.substr(0, oldspot+1); sequence += 'N'; } updateOld = true; charInMiddle.clear(); } } for(int j=0;j 4){ sequence = sequence.substr(4); } else{ sequence = "NNNN"; } } catch(exception& e) { m->errorOut(e, "FlowData", "translateFlow"); exit(1); } } //********************************************************************************************************************** void FlowData::capFlows(int mF){ try{ maxFlows = mF; if(endFlow > maxFlows){ endFlow = maxFlows; } translateFlow(); } catch(exception& e) { m->errorOut(e, "FlowData", "capFlows"); exit(1); } } //********************************************************************************************************************** bool FlowData::hasGoodHomoP(){ try{ float maxIntensity = (float) maxHomoP + 0.49; for(int i=0;i maxIntensity){ return 0; } } return 1; } catch(exception& e) { m->errorOut(e, "FlowData", "hasMinFlows"); exit(1); } } //********************************************************************************************************************** bool FlowData::hasMinFlows(int minFlows){ try{ bool pastMin = 0; if(endFlow >= minFlows){ pastMin = 1; } return pastMin; } catch(exception& e) { m->errorOut(e, "FlowData", "hasMinFlows"); exit(1); } } //********************************************************************************************************************** Sequence FlowData::getSequence(){ try{ return Sequence(seqName, sequence); } catch(exception& e) { m->errorOut(e, "FlowData", "getSequence"); exit(1); } } //********************************************************************************************************************** void FlowData::printFlows(ofstream& outFlowFile){ try{ // outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl; outFlowFile << seqName << ' ' << endFlow << ' ' << setprecision(2); for(int i=0;ierrorOut(e, "FlowData", "printFlows"); exit(1); } } //********************************************************************************************************************** void FlowData::printFlows(ofstream& outFlowFile, string scrapCode){ try{ outFlowFile << seqName << '|' << scrapCode << ' ' << endFlow << ' ' << setprecision(2); for(int i=0;ierrorOut(e, "FlowData", "printFlows"); exit(1); } } //********************************************************************************************************************** void FlowData::printFlows(OutputWriter* out){ try{ // outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl; string output = seqName + ' ' + toString(endFlow) + ' '; for(int i=0;iwrite(output); } catch(exception& e) { m->errorOut(e, "FlowData", "printFlows"); exit(1); } } //********************************************************************************************************************** void FlowData::printFlows(OutputWriter* out, string scrapCode){ try{ string output = seqName + '|' + scrapCode + ' ' + toString(endFlow) + ' '; for(int i=0;iwrite(output); } catch(exception& e) { m->errorOut(e, "FlowData", "printFlows"); exit(1); } } //********************************************************************************************************************** string FlowData::getName(){ try{ return seqName; } catch(exception& e) { m->errorOut(e, "FlowData", "getName"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/datastructures/flowdata.h000077500000000000000000000016431424121717000214570ustar00rootroot00000000000000#ifndef FLOWDATA_H #define FLOWDATA_H /* * flowdata.h * Mothur * * Created by Pat Schloss on 12/22/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "utils.hpp" #include "mothurout.h" #include "sequence.hpp" class FlowData { public: FlowData(); FlowData(int, float, float, int, string); ~FlowData(); bool getNext(ifstream&); string getName(); void capFlows(int); bool hasMinFlows(int); bool hasGoodHomoP(); Sequence getSequence(); void printFlows(ofstream&); void printFlows(ofstream&, string); void printFlows(OutputWriter*); void printFlows(OutputWriter*, string); private: MothurOut* m; Utils util; void updateEndFlow(); void translateFlow(); float signalIntensity, noiseIntensity; int maxHomoP; string seqName, locationString, sequence, baseFlow; int numFlows, maxFlows, endFlow; vector flowData; string getSequenceName(ifstream&); }; #endif mothur-1.48.0/source/datastructures/fullmatrix.cpp000077500000000000000000000145511424121717000224020ustar00rootroot00000000000000/* * fullmatrix.cpp * Mothur * * Created by Sarah Westcott on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "fullmatrix.h" /**************************************************************************/ //This constructor reads a distance matrix file and stores the data in the matrix. FullMatrix::FullMatrix(ifstream& filehandle, GroupMap* g, bool s) : groupmap(g), sim(s) { try{ m = MothurOut::getInstance(); string name, group; filehandle >> numSeqs >> name; //make the matrix filled with zeros matrix.resize(numSeqs); for(int i = 0; i < numSeqs; i++) { matrix[i].resize(numSeqs, 0.0); } group = groupmap->getGroup(name); if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct.\n"); exit(1); } index.resize(numSeqs); index[0].seqName = name; index[0].groupName = group; //determine if matrix is square or lower triangle //if it is square read the distances for the first sequence char d; bool square = true; while((d=filehandle.get()) != EOF){ //is d a number meaning its square if(isalnum(d)){ square = true; filehandle.putback(d); for(int i=0;i> matrix[0][i]; if (sim) { matrix[0][i] = 1.0 - matrix[0][i]; } } break; } //is d a line return meaning its lower triangle if(d == '\n'){ square = false; break; } } //read rest of matrix if (square) { readSquareMatrix(filehandle); } else { readLTMatrix(filehandle); } filehandle.close(); if (!m->getControl_pressed()) { sortGroups(0, numSeqs-1); } } catch(exception& e) { m->errorOut(e, "FullMatrix", "FullMatrix"); exit(1); } } /**************************************************************************/ int FullMatrix::readSquareMatrix(ifstream& filehandle) { try { int count = 0; string group, name; for(int i=1;i> name; group = groupmap->getGroup(name); index[i].seqName = name; index[i].groupName = group; if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct.\n"); exit(1); } for(int j=0;jgetControl_pressed()) { return 0; } filehandle >> matrix[i][j]; if (sim) { matrix[i][j] = 1.0 - matrix[i][j]; } count++; } } if (m->getControl_pressed()) { return 0; } return 0; } catch(exception& e) { m->errorOut(e, "FullMatrix", "readSquareMatrix"); exit(1); } } /**************************************************************************/ int FullMatrix::readLTMatrix(ifstream& filehandle) { try { int count = 0; float distance; string group, name; for(int i=1;i> name; group = groupmap->getGroup(name); index[i].seqName = name; index[i].groupName = group; if(group == "not found") { m->mothurOut("Error: Sequence '" + name + "' was not found in the group file, please correct.\n"); exit(1); } for(int j=0;jgetControl_pressed()) { return 0; } filehandle >> distance; if (sim) { distance = 1.0 - distance; } matrix[i][j] = distance; matrix[j][i] = distance; count++; } } if (m->getControl_pressed()) { return 0; } return 0; } catch(exception& e) { m->errorOut(e, "FullMatrix", "readLTMatrix"); exit(1); } } /**************************************************************************/ void FullMatrix::sortGroups(int low, int high){ try{ if (low < high) { int i = low+1; int j = high; int pivot = (low+high) / 2; swapRows(low, pivot); //puts pivot in final spot /* compare value */ //what group does this row belong to string key = index[low].groupName; /* partition */ while(i <= j) { /* find member above ... */ while((i <= high) && (index[i].groupName <= key)) { i++; } /* find element below ... */ while((j >= low) && (index[j].groupName > key)) { j--; } if(i < j) { swapRows(i, j); } } swapRows(low, j); /* recurse */ sortGroups(low, j-1); sortGroups(j+1, high); } } catch(exception& e) { m->errorOut(e, "FullMatrix", "sortGroups"); exit(1); } } /**************************************************************************/ void FullMatrix::swapRows(int i, int j) { try { float y; string z, name; /* swap rows*/ for (int h = 0; h < numSeqs; h++) { y = matrix[i][h]; matrix[i][h] = matrix[j][h]; matrix[j][h] = y; } /* swap columns*/ for (int b = 0; b < numSeqs; b++) { y = matrix[b][i]; matrix[b][i] = matrix[b][j]; matrix[b][j] = y; } //swap map elements z = index[i].groupName; index[i].groupName = index[j].groupName; index[j].groupName = z; name = index[i].seqName; index[i].seqName = index[j].seqName; index[j].seqName = name; } catch(exception& e) { m->errorOut(e, "FullMatrix", "swapRows"); exit(1); } } /**************************************************************************/ float FullMatrix::get(int i, int j){ return matrix[i][j]; } /**************************************************************************/ vector FullMatrix::getGroups(){ return groups; } /**************************************************************************/ vector FullMatrix::getSizes(){ return sizes; } /**************************************************************************/ int FullMatrix::getNumGroups(){ return groups.size(); } /**************************************************************************/ int FullMatrix::getNumSeqs(){ return numSeqs; } /**************************************************************************/ void FullMatrix::printMatrix(ostream& out) { try{ for (int i = 0; i < numSeqs; i++) { out << "row " << i << " group = " << index[i].groupName << " name = " << index[i].seqName << endl; for (int j = 0; j < numSeqs; j++) { out << i << '\t' << j << '\t' << matrix[i][j] << endl; } out << endl; } for (int i = 0; i < numSeqs; i++) { out << i << '\t' << index[i].seqName << endl; } } catch(exception& e) { m->errorOut(e, "FullMatrix", "printMatrix"); exit(1); } } /**************************************************************************/ mothur-1.48.0/source/datastructures/fullmatrix.h000077500000000000000000000025031424121717000220410ustar00rootroot00000000000000#ifndef FULLMATRIX_H #define FULLMATRIX_H /* * fullmatrix.h * Mothur * * Created by Sarah Westcott on 3/6/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "groupmap.h" struct Names { string seqName; string groupName; }; class FullMatrix { public: //FullMatrix(){ m = MothurOut::getInstance(); } FullMatrix(ifstream&, GroupMap*, bool); ~FullMatrix(){}; int getNumSeqs(); vector getSizes(); vector getGroups(); void setGroups(vector names) { groups = names; } void setSizes(vector s) { sizes = s; } int getNumGroups(); void printMatrix(ostream&); float get(int, int); Names getRowInfo(int row) { return index[row]; } private: vector< vector > matrix; //a 2D distance matrix of all the sequences and their distances to eachother. int readSquareMatrix(ifstream&); int readLTMatrix(ifstream&); vector index; // row in vector, sequence group. need to know this so when we sort it can be updated. vector sizes; vector groups; void sortGroups(int, int); //this function sorts the sequences within the matrix. void swapRows(int, int); GroupMap* groupmap; //maps sequences to groups they belong to. int numSeqs; int numGroups; int numUserGroups; bool sim; MothurOut* m; }; #endif mothur-1.48.0/source/datastructures/groupmap.cpp000077500000000000000000000650541424121717000220510ustar00rootroot00000000000000/* * groupmap.cpp * Dotur * * Created by Sarah Westcott on 12/1/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "groupmap.h" /************************************************************/ GroupMap::GroupMap(string filename) { m = MothurOut::getInstance(); groupFileName = filename; index = 0; } /************************************************************/ GroupMap::~GroupMap(){} /************************************************************/ int GroupMap::addSeq(string name, string group) { try { int error = 0; util.checkGroupName(group); setNamesOfGroups(group); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + name + "', group = '" + group + "'\n"); } util.checkName(name); it = groupmap.find(name); if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + name + ", sequence names must be unique. Please correct.\n"); } else { groupmap[name] = group; //store data in map seqsPerGroup[group]++; //increment number of seqs in that group } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "addSeq"); exit(1); } } /************************************************************/ int GroupMap::readMap(string filename, vector g) { try { groupFileName = filename; return (readMap(g)); } catch(exception& e) { m->errorOut(e, "GroupMap", "readMap"); exit(1); } } /************************************************************/ int GroupMap::readMap(vector g) { try { int error = 0; if (g.size() == 0) { return readMap(); } else { if (groupFileName == "") { m->mothurOut("[ERROR]: missing groupfile name, aborting.\n"); m->setControl_pressed(true); return 0; } ifstream fileHandle; util.openInputFile(groupFileName, fileHandle); string header = util.getline(fileHandle); vector pieces = util.splitWhiteSpace(header); string seqName = pieces[0]; string seqGroup = pieces[1]; if (seqName != "group") { //first group, not header if (util.inUsersGroups(seqGroup, g)) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("[ERROR]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } } } while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle >> seqName; gobble(fileHandle); fileHandle >> seqGroup; gobble(fileHandle); if (util.inUsersGroups(seqGroup, g)) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("[ERROR]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } } } fileHandle.close(); } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "readMap"); exit(1); } } /************************************************************/ int GroupMap::readMap() { try { if (groupFileName == "") { m->mothurOut("[ERROR]: missing groupfile name, aborting.\n"); m->setControl_pressed(true); return 0; } string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; ifstream fileHandle; util.openInputFile(groupFileName, fileHandle); string header = util.getline(fileHandle); vector pieces = util.splitWhiteSpace(header); seqName = pieces[0]; seqGroup = pieces[1]; if (seqName != "group") { //first group, not header util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } } while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "readMap"); exit(1); } } /************************************************************/ int GroupMap::readDesignMap() { try { string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; if (groupFileName == "") { m->mothurOut("[ERROR]: missing groupfile name, aborting.\n"); m->setControl_pressed(true); return 0; } ifstream fileHandle; util.openInputFile(groupFileName, fileHandle); while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "readDesignMap"); exit(1); } } /************************************************************/ int GroupMap::readMap(string filename) { try { groupFileName = filename; ifstream fileHandle; util.openInputFile(filename, fileHandle); index = 0; string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "readMap"); exit(1); } } /************************************************************/ int GroupMap::readDesignMap(string filename) { try { groupFileName = filename; ifstream fileHandle; util.openInputFile(filename, fileHandle); index = 0; string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { util.checkGroupName(seqGroup); setNamesOfGroups(seqGroup); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); } util.checkName(seqName); it = groupmap.find(seqName); if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[seqName] = seqGroup; //store data in map seqsPerGroup[seqGroup]++; //increment number of seqs in that group } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "GroupMap", "readDesignMap"); exit(1); } } /************************************************************/ int GroupMap::getNumGroups() { return namesOfGroups.size(); } /************************************************************/ string GroupMap::getGroup(string sequenceName) { it = groupmap.find(sequenceName); if (it != groupmap.end()) { //sequence name was in group file return it->second; }else { //look for it in names of groups to see if the user accidently used the wrong file if (util.inUsersGroups(sequenceName, namesOfGroups)) { m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ". Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group.\n"); } return "not found"; } } /************************************************************/ vector GroupMap::getGroups(string sequenceNames) { try{ vector names; util.splitAtComma(sequenceNames, names); return (getGroups(names)); } catch(exception& e) { m->errorOut(e, "GroupMap", "getGroups"); exit(1); } } /************************************************************/ vector GroupMap::getGroups(vector sequenceNames) { try{ set repGroups; for (int i = 0; i < sequenceNames.size(); i++) { repGroups.insert(getGroup(sequenceNames[i])); } return (util.mothurConvert(repGroups)); } catch(exception& e) { m->errorOut(e, "GroupMap", "getGroups"); exit(1); } } /************************************************************/ int GroupMap::getNumSeqs(string sequenceNames, string group) { try{ vector names; util.splitAtComma(sequenceNames, names); return (getNumSeqs(names, group)); } catch(exception& e) { m->errorOut(e, "GroupMap", "getGroups"); exit(1); } } /************************************************************/ int GroupMap::getNumSeqs(vector sequenceNames, string group) { try{ int count = 0; for (int i = 0; i < sequenceNames.size(); i++) { if (group == getGroup(sequenceNames[i])) { count++; } } return count; } catch(exception& e) { m->errorOut(e, "GroupMap", "getGroups"); exit(1); } } /************************************************************/ void GroupMap::setGroup(string sequenceName, string groupN) { util.checkGroupName(groupN); setNamesOfGroups(groupN); util.checkName(sequenceName); it = groupmap.find(sequenceName); if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct.\n"); } else { groupmap[sequenceName] = groupN; //store data in map seqsPerGroup[groupN]++; //increment number of seqs in that group } } /************************************************************/ void GroupMap::setNamesOfGroups(string seqGroup) { int i, count; count = 0; for (i=0; ierrorOut(e, "GroupMap", "isValidGroup"); exit(1); } } /************************************************************/ int GroupMap::getCopy(GroupMap* g) { try { vector names = g->getNamesSeqs(); for (int i = 0; i < names.size(); i++) { if (m->getControl_pressed()) { break; } string group = g->getGroup(names[i]); setGroup(names[i], group); } return names.size(); } catch(exception& e) { m->errorOut(e, "GroupMap", "getCopy"); exit(1); } } /************************************************************/ int GroupMap::getNumSeqs(string group) { try { map::iterator itNum; itNum = seqsPerGroup.find(group); if (itNum == seqsPerGroup.end()) { return 0; } return seqsPerGroup[group]; } catch(exception& e) { m->errorOut(e, "GroupMap", "getNumSeqs"); exit(1); } } /************************************************************/ int GroupMap::getNumSeqsSmallestGroup() { try { int smallestGroup = MOTHURMAX; for (map::iterator itNum = seqsPerGroup.begin(); itNum != seqsPerGroup.end(); itNum++) { if (itNum->second < smallestGroup) { smallestGroup = itNum->second; } } return smallestGroup; } catch(exception& e) { m->errorOut(e, "GroupMap", "getNumSeqsSmallestGroup"); exit(1); } } /************************************************************/ int GroupMap::renameSeq(string oldName, string newName) { try { map::iterator itName; itName = groupmap.find(oldName); if (itName == groupmap.end()) { m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file"); m->setControl_pressed(true); return 0; }else { string group = itName->second; groupmap.erase(itName); groupmap[newName] = group; } return 0; } catch(exception& e) { m->errorOut(e, "GroupMap", "renameSeq"); exit(1); } } /************************************************************/ int GroupMap::print(string outputName) { try { ofstream out; util.openOutputFile(outputName, out); for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { out << itName->first << '\t' << itName->second << endl; } out.close(); return 0; } catch(exception& e) { m->errorOut(e, "GroupMap", "print"); exit(1); } } /************************************************************/ int GroupMap::print(ofstream& out) { try { for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { out << itName->first << '\t' << itName->second << endl; } return 0; } catch(exception& e) { m->errorOut(e, "GroupMap", "print"); exit(1); } } /************************************************************/ int GroupMap::print(ofstream& out, vector userGroups) { try { for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { if (util.inUsersGroups(itName->second, userGroups)) { out << itName->first << '\t' << itName->second << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "GroupMap", "print"); exit(1); } } /************************************************************/ vector GroupMap::getNamesSeqs(){ try { vector names; for (it = groupmap.begin(); it != groupmap.end(); it++) { names.push_back(it->first); } return names; } catch(exception& e) { m->errorOut(e, "GroupMap", "getNamesSeqs"); exit(1); } } /************************************************************/ vector GroupMap::getNamesSeqs(vector picked){ try { vector names; for (it = groupmap.begin(); it != groupmap.end(); it++) { //if you are belong to one the the groups in the picked vector add you if (util.inUsersGroups(it->second, picked)) { names.push_back(it->first); } } return names; } catch(exception& e) { m->errorOut(e, "GroupMap", "getNamesSeqs"); exit(1); } } /************************************************************/ vector GroupMap::getNamesSeqs(string picked){ try { vector names; for (it = groupmap.begin(); it != groupmap.end(); it++) { //if you are belong to one the the groups in the picked vector add you if (it->second == picked) { names.push_back(it->first); } } return names; } catch(exception& e) { m->errorOut(e, "GroupMap", "getNamesSeqs"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/groupmap.h000077500000000000000000000070031424121717000215040ustar00rootroot00000000000000#ifndef GROUPMAP_H #define GROUPMAP_H /* * groupmap.h * Mothur * * Created by Sarah Westcott on 12/1/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" #include "utils.hpp" /* This class is a representation of the groupfile. It is used by all the shared commands to determine what group a certain sequence belongs to. */ class GroupMap { public: GroupMap() { m = MothurOut::getInstance(); groupFileName = ""; } GroupMap(string); ~GroupMap(); int getCopy(GroupMap*); int readMap(); int readMap(vector groups); //selected groups read in. If groups.size() == 0, all groups are read int readMap(string, vector groups); //filename, selected groups. selected groups read in. If groups.size() == 0, all groups are read int readMap(string); int readDesignMap(); int readDesignMap(string); int getNumGroups(); bool isValidGroup(string); //return true if string is a valid group string getGroup(string); vector getGroups(string); //returns groups represented by the seqs passed in. Think column two from a namefile row (seq1,seq2,seq3,seq4,seq5) -> (group1,group2). seqs1,seq3 are from group1, seq2,seq4,seq5 are from group2. vector getGroups(vector); //returns groups represented by the seqs passed in. Think column two from a namefile row (seq1,seq2,seq3,seq4,seq5) stored as a vector of names -> (group1,group2). seqs1,seq3 are from group1, seq2,seq4,seq5 are from group2. int getNumSeqs(string, string); //list of seq names, group. returns number of seqs from group passed represented by the seqs passed in. Think column two from a namefile row (seq1,seq2,seq3,seq4,seq5), group1 -> 2. seqs1,seq3 are from group1, seq2,seq4,seq5 are from group2. int getNumSeqs(vector, string); //vector of seq names, group. returns number of seqs from group passed represented by the seqs passed in. Think column two from a namefile row (seq1,seq2,seq3,seq4,seq5), group1 -> 2. seqs1,seq3 are from group1, seq2,seq4,seq5 are from group2. void setGroup(string, string); vector getNamesOfGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); groupIndex.clear(); for (int i = 0; i < namesOfGroups.size(); i++) { groupIndex[namesOfGroups[i]] = i; } return namesOfGroups; } void removeGroups(vector groups); vector getNamesSeqs(); vector getNamesSeqs(string); //get names of seqs belonging to group passed in vector getNamesSeqs(vector); //get names of seqs belonging to the set of groups passed in void setNamesOfGroups(vector sn) { namesOfGroups = sn; } int getNumSeqs() { return (int)groupmap.size(); } int getNumSeqs(string); //return the number of seqs in a given group int getNumSeqsSmallestGroup(); //returns size of smallest group int renameSeq(string, string); int addSeq(string name, string group); int print(string); int print(ofstream&); int print(ofstream&, vector); //print certain groups map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands. private: vector namesOfGroups; MothurOut* m; string groupFileName; int index; map::iterator it; void setNamesOfGroups(string); map groupmap; //sequence name and groupname map seqsPerGroup; //maps groupname to number of seqs in that group Utils util; }; #endif mothur-1.48.0/source/datastructures/kmer.cpp000077500000000000000000000151571424121717000211540ustar00rootroot00000000000000/* * kmer.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "kmer.hpp" /**************************************************************************************************/ Kmer::Kmer(int size) : kmerSize(size) { // The constructor sets the size of the kmer int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; // No reason to waste the time of recalculating maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; // powers of 4 everytime through. We need an // extra kmer if we get a non-ATGCU base } /**************************************************************************************************/ string Kmer::getKmerString(string sequence){ // Calculate kmer for each position in the sequence, count the freq int length = sequence.length(); // of each kmer, and convert it to an ascii character with base '!'. int nKmers = length - kmerSize + 1; // Export the string of characters as a string vector counts(maxKmer, 0); for(int i=0;i > Kmer::getKmerCounts(string sequence){ // Calculate kmer for each position in the sequence, save info in a map int length = sequence.length(); // so you know at each spot in the sequence what kmers were found int nKmers = length - kmerSize + 1; // vector< map > counts; counts.resize(nKmers); // a map kmer counts for each spot map::iterator it; for(int i=0;i T [T] // Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A [AT] // Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C [CAT] // Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G [GCAT] // Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T [TGCAT] // Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A [ATGCAT] -> this checks out with the previous method int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; string kmer = ""; if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){ // if the kmer number is the same as the maxKmer then it must for(int i=0;i=0;i--){ if(kmerString[i] == 'A') { reverse += 'T'; } else if(kmerString[i] == 'T'){ reverse += 'A'; } else if(kmerString[i] == 'G'){ reverse += 'C'; } else if(kmerString[i] == 'C'){ reverse += 'G'; } else { reverse += 'N'; } } int reverseNumber = getKmerNumber(reverse, 0); return reverseNumber; } /**************************************************************************************************/ char Kmer::getASCII(int number) { return (char)(33+number); } // '!' is the first printable char and // has the int value of 33 /**************************************************************************************************/ int Kmer::getNumber(char character) { return ((int)(character-'!')); } // '!' has the value of 33 /**************************************************************************************************/ mothur-1.48.0/source/datastructures/kmer.hpp000077500000000000000000000013751424121717000211560ustar00rootroot00000000000000#ifndef KMER_HPP #define KMER_HPP /* * kmer.hpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "mothur.h" /**************************************************************************************************/ class Kmer { public: Kmer(int); ~Kmer() = default; string getKmerString(string); int getKmerNumber(string, int); string getKmerBases(int); int getReverseKmerNumber(int); vector< map > getKmerCounts(string sequence); //for use in chimeraCheck private: char getASCII(int); int getNumber(char); int kmerSize; int maxKmer; int nKmers; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/kmeralign.cpp000077500000000000000000000133361424121717000221640ustar00rootroot00000000000000// // kmeralign.cpp // Mothur // // Created by Pat Schloss on 4/6/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "kmeralign.h" #include "kmer.hpp" #include "alignment.hpp" /**************************************************************************************************/ KmerAlign::KmerAlign(int k) : kmerSize(k), kmerLibrary(k), Alignment() { try { int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; //maxKmer = kmerLibrary.getMaxKmer(); maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; } catch(exception& e) { m->errorOut(e, "KmerAlign", "KmerAlign"); exit(1); } } /**************************************************************************************************/ KmerAlign::~KmerAlign(){ /* do nothing */ } /**************************************************************************************************/ //modelled after pandaseqs kmer align, assemble.c void KmerAlign::align(string A, string B, bool createBaseMap){ try { int aLength = A.length(); int bLength = B.length(); int maxOverlap = aLength; if (bLength < aLength) { maxOverlap = bLength; } maxOverlap -= 2; int nKmersA = A.length() - kmerSize + 1; vector< vector > kmerseen; //set all kmers to unseen kmerseen.resize(maxKmer); //for (int i = 0; i < maxKmer; i++) { kmerseen[i].resize(numKmers, 0); } int kmer; /* Scan forward sequence building k-mers and appending the position to kmerseen[k] */ for(int i=0;i overlaps; for(int i=0;i::iterator it = overlaps.begin(); it != overlaps.end(); it++) { int index = *it; int overlap = index + 2; //2 = minoverlap double probability = calcProb(A, B, overlap); //printf("overlap prob: %i, %f\n", overlap, probability); if (probability > bestProb && overlap >= 2) { bestProb = probability; bestOverlap = overlap; } } //printf("best overlap prob: %i, %f\n", bestOverlap, bestProb); if(bestOverlap != -1){ if((aLength-bestOverlap) > 0){ //add gaps to the start of B int numGaps = (aLength-bestOverlap); B = string(numGaps, '-') + B; if (createBaseMap) { for (int i = 0; i < bLength; i++) { BBaseMap[i+numGaps] = i; } for (int i = 0; i < aLength; i++) { ABaseMap[i] = i; } } }else { if (createBaseMap) { for (int i = 0; i < bLength; i++) { BBaseMap[i] = i; } for (int i = 0; i < aLength; i++) { ABaseMap[i] = i; } } } } int diff = B.length() - A.length(); if(diff > 0){ A = A + string(diff, '-'); } seqAaln = A; seqBaln = B; pairwiseLength = seqAaln.length(); } catch(exception& e) { m->errorOut(e, "KmerAlign", "align"); exit(1); } } /**************************************************************************************************/ //modelled after pandaseqs kmer align, assemble.c double KmerAlign::calcProb(string A, string B, int overlap){ try { double prob = 0; int aLength = A.length(); int bLength = B.length(); int unknown, match, mismatch; unknown = 0; match = 0; mismatch = 0; for (int i = 0; i < overlap; i++) { int findex = aLength + i - overlap; int rindex = i; if (findex < 0 || rindex < 0 || findex >= aLength || rindex >= bLength) continue; char f = A[findex]; char r = B[rindex]; if ((f == 'N') || (r == 'N')) { unknown++; } else if (r == f) { match++; } else { mismatch++; } } //ln(0.25 * (1 - 2 * 0.36 + 0.36 * 0.36)) double pmatch = -2.278869; //ln((3 * 0.36 - 2 * 0.36 * 0.36) / 18.0) double pmismatch = -3.087848; if (overlap >= aLength && overlap >= bLength) { prob = (-1.38629 * unknown + match * pmatch + mismatch * pmismatch); } else { prob = (-1.38629 * (aLength + bLength - 2 * overlap + unknown) + match * pmatch + mismatch * pmismatch); } return prob; } catch(exception& e) { m->errorOut(e, "KmerAlign", "calcProb"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/kmeralign.h000077500000000000000000000016771424121717000216360ustar00rootroot00000000000000#ifndef KMERALIGN_N #define KMERALIGN_N /* * kmeralign.h * * * Created by Pat Schloss on 4/6/14. * Copyright 2014 Patrick D. Schloss. All rights reserved. * * This class is an Alignment child class that implements a kmer-based pairwise alignment algorithm * for making contigs of reads without insertions * * */ #include "alignment.hpp" #include "kmer.hpp" # define PHREDMAX 46 # define PHREDCLAMP(x) ((x) > PHREDMAX ? PHREDMAX : ((x) < 0 ? 0 : (x))) /**************************************************************************************************/ class KmerAlign : public Alignment { public: KmerAlign(int); ~KmerAlign(); void align(string, string, bool createBaseMap=false); private: int kmerSize; int maxKmer; Kmer kmerLibrary; double calcProb(string A, string B, int overlap); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/kmerdb.cpp000077500000000000000000000225271424121717000214610ustar00rootroot00000000000000/* * kmerdb.cpp * * * Created by Pat Schloss on 12/16/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is a child class of the Database class, which stores the template sequences as a kmer table and provides * a method of searching the kmer table for the sequence with the most kmers in common with a query sequence. * kmerLocations is the primary storage variable that is a two-dimensional vector where each row represents the * different number of kmers and each column contains the index to sequences that use that kmer. * * Construction of an object of this type will first look for an appropriately named database file and if it is found * then will read in the database file (readKmerDB), otherwise it will generate one and store the data in memory * (generateKmerDB) * * The search method used here is roughly the same as that used in the SimRank program that is found at the * greengenes website. The default kmer size is 7. The speed complexity is between O(L) and O(LN). When I use 7mers * on average a kmer is found in ~100 other sequences with a database of ~5000 sequences. If this is the case then the * time would be on the order of O(0.1LN) -> fast. * */ #include "sequence.hpp" #include "kmer.hpp" #include "searchdatabase.hpp" #include "kmerdb.hpp" /**************************************************************************************************/ KmerDB::KmerDB(string fastaFileName, int kSize) : SearchDatabase(), kmerSize(kSize) { try { kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; count = 0; maxKmer = power4s[kmerSize]; kmerLocations.resize(maxKmer+1); CurrentFile* current; current = CurrentFile::getInstance(); version = current->getVersion(); } catch(exception& e) { m->errorOut(e, "KmerDB", "KmerDB"); exit(1); } } /**************************************************************************************************/ KmerDB::KmerDB() : SearchDatabase() { CurrentFile* current; current = CurrentFile::getInstance(); version = current->getVersion(); } /**************************************************************************************************/ KmerDB::~KmerDB(){} /**************************************************************************************************/ vector KmerDB::findClosestSequences(Sequence* candidateSeq, int num, vector& Scores) const{ try { if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting.\n"); num = numSeqs; } vector topMatches; Kmer kmer(kmerSize); float searchScore = 0; Scores.clear(); vector matches(numSeqs, 0); // a record of the sequences with shared kmers vector timesKmerFound(kmerLocations.size()+1, false); // a record of the kmers that we have already found int numKmers = candidateSeq->getNumBases() - kmerSize + 1; for(int i=0;igetUnaligned(), i); // go through the query sequence and get a kmer number if(!timesKmerFound[kmerNumber]){ // if we haven't seen it before... for(int j=0;j seqMatches; seqMatches.resize(numSeqs); for(int i=0;i bestMatch) { bestIndex = i; bestMatch = matches[i]; } } searchScore = bestMatch; searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db topMatches.push_back(bestIndex); Scores.push_back(searchScore); } return topMatches; } catch(exception& e) { m->errorOut(e, "KmerDB", "findClosestSequences"); exit(1); } } /**************************************************************************************************/ //print shortcut file void KmerDB::generateDB(){ try { ofstream kmerFile; // once we have the kmerLocations folder print it out util.openOutputFile(kmerDBName, kmerFile); // to a file //output version kmerFile << "#" << version << endl; for(int i=0;ierrorOut(e, "KmerDB", "generateDB"); exit(1); } } /**************************************************************************************************/ void KmerDB::addSequence(Sequence seq) { try { Kmer kmer(kmerSize); string unaligned = seq.getUnaligned(); // ...take the unaligned sequence... int numKmers = unaligned.length() - kmerSize + 1; vector seenBefore(maxKmer+1,0); for(int j=0;jerrorOut(e, "KmerDB", "addSequence"); exit(1); } } /**************************************************************************************************/ //reads fasta file void KmerDB::readSeqs(ifstream& fastaFile){ try { while (!fastaFile.eof()) { if (m->getControl_pressed()) { break; } Sequence seq(fastaFile); gobble(fastaFile); addSequence(seq); } } catch(exception& e) { m->errorOut(e, "KmerDB", "readSeqs"); exit(1); } } /**************************************************************************************************/ //reads shortcut file void KmerDB::readDB(ifstream& kmerDBFile){ try { kmerDBFile.seekg(0); // start at the beginning of the file //read version string line = util.getline(kmerDBFile); gobble(kmerDBFile); string seqName; int seqNumber; for(int i=0;i> seqName >> numValues; for(int j=0;j> seqNumber; // 1. number of sequences with the kmer number kmerLocations[i].push_back(seqNumber); // 2. sequence indices } } kmerDBFile.close(); } catch(exception& e) { m->errorOut(e, "KmerDB", "readDB"); exit(1); } } /**************************************************************************************************/ int KmerDB::getCount(int kmer) { try { if (kmer < 0) { return 0; } //if user gives negative number else if (kmer > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer else { return kmerLocations[kmer].size(); } // kmer is in vector range } catch(exception& e) { m->errorOut(e, "KmerDB", "getCount"); exit(1); } } /**************************************************************************************************/ int KmerDB::getReversed(int kmerNumber) { try { Kmer kmer(kmerSize); if (kmerNumber < 0) { return 0; } //if user gives negative number else if (kmerNumber > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer else { return kmer.getReverseKmerNumber(kmerNumber); } // kmer is in vector range } catch(exception& e) { m->errorOut(e, "KmerDB", "getReversed"); exit(1); } } /**************************************************************************************************/ vector KmerDB::getSequencesWithKmer(int kmer) { try { vector seqs; if (kmer < 0) { } //if user gives negative number else if (kmer > maxKmer) { } //or a kmer that is bigger than maxkmer else { seqs = kmerLocations[kmer]; } return seqs; } catch(exception& e) { m->errorOut(e, "KmerDB", "getSequencesWithKmer"); exit(1); } } /**************************************************************************************************/ /**************************************************************************************************/ mothur-1.48.0/source/datastructures/kmerdb.hpp000077500000000000000000000031631424121717000214610ustar00rootroot00000000000000#ifndef KMERDB_HPP #define KMERDB_HPP /* * kmerdb.h * * * Created by Pat Schloss on 12/16/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is a child class of the Database class, which stores the template sequences as a kmer table and provides * a method of searching the kmer table for the sequence with the most kmers in common with a query sequence. * kmerLocations is the primary storage variable that is a two-dimensional vector where each row represents the * different number of kmers and each column contains the index to sequences that use that kmer. * * Construction of an object of this type will first look for an appropriately named database file and if it is found * then will read in the database file (readKmerDB), otherwise it will generate one and store the data in memory * (generateKmerDB) */ #include "mothur.h" #include "searchdatabase.hpp" class KmerDB : public SearchDatabase { public: KmerDB(string, int); KmerDB(); ~KmerDB(); void generateDB(); void addSequence(Sequence); vector findClosestSequences(Sequence*, int, vector&) const; void readDB(ifstream&); //reads in a shortcut file void readSeqs(ifstream&); //reads in a fasta file int getCount(int); //returns number of sequences with that kmer number vector getSequencesWithKmer(int); //returns vector of sequences that contain kmer passed in int getReversed(int); //returns reverse compliment kmerNumber int getMaxKmer() { return maxKmer; } private: string version; int kmerSize; int maxKmer, count; string kmerDBName; vector > kmerLocations; }; #endif mothur-1.48.0/source/datastructures/listvector.cpp000077500000000000000000000427501424121717000224130ustar00rootroot00000000000000/* * list.cpp * * * Created by Pat Schloss on 8/8/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "sabundvector.hpp" #include "rabundvector.hpp" #include "ordervector.hpp" #include "listvector.hpp" //sorts highest to lowest /***********************************************************************/ inline bool abundNamesSort(string left, string right){ int countLeft = 0; if(left != ""){ countLeft = 1; for(int i=0;i countRight) { return true; } return false; } //sorts highest to lowest /***********************************************************************/ inline bool abundNamesSort2(listCt left, listCt right){ if (left.bin == "") { return false; } if (right.bin == "") { return true; } if (left.binSize > right.binSize) { return true; } return false; } /***********************************************************************/ ListVector::ListVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0), otuTag("Otu"), printListHeaders(true) {} /***********************************************************************/ ListVector::ListVector(string oTag) : DataVector(), maxRank(0), numBins(0), numSeqs(0), otuTag(oTag), printListHeaders(true) {} /***********************************************************************/ ListVector::ListVector(int n): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0), otuTag("Otu"), printListHeaders(true){} /***********************************************************************/ ListVector::ListVector(int n, string oTag): DataVector(), data(n, "") , maxRank(0), numBins(0), numSeqs(0), otuTag(oTag), printListHeaders(true){} /***********************************************************************/ ListVector::ListVector(string id, vector lv, string& tag) : DataVector(id), data(lv){ try { printListHeaders = true; otuTag = tag; Utils util; for(int i=0;i maxRank) { maxRank = binSize; } numSeqs += binSize; } } } catch(exception& e) { m->errorOut(e, "ListVector", "ListVector"); exit(1); } } /**********************************************************************/ ListVector::ListVector(ifstream& f, string& readHeaders, string& labelTag) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { printListHeaders = true; int thisNumBins = 0; Utils util; //are we at the beginning of the file?? if (readHeaders == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); binLabels.push_back(temp); } if (binLabels.size() != 0) { string binLabelTag = binLabels[0]; labelTag = ""; for (int i = 0; i < binLabelTag.length(); i++) { if (isalpha(binLabelTag[i])){ labelTag += binLabelTag[i]; } } } f >> label >> thisNumBins; }else { //read in first row f >> thisNumBins; //make binlabels because we don't have any string snumBins = toString(thisNumBins); if (labelTag == "") { labelTag = "Otu"; } for (int i = 0; i < thisNumBins; i++) { //if there is a bin label use it otherwise make one string binLabel = labelTag; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; binLabels.push_back(binLabel); } } }else { f >> label >> thisNumBins; } gobble(f); data.assign(thisNumBins, ""); string inputData = ""; otuTag = labelTag; string buffer = util.getline(f); gobble(f); vector thisListBins = util.splitWhiteSpace(buffer); if (thisListBins.size() != thisNumBins) { m->mothurOut("[ERROR]: Your list file indicates you have " + toString(thisNumBins) + ", but mothur found " + toString(thisListBins.size())+ " bins in your file, please correct.\n"); m->setControl_pressed(true); }else { for(int i=0;ierrorOut(e, "ListVector", "ListVector"); exit(1); } } /***********************************************************************/ void ListVector::set(int binNumber, string seqNames){ try { Utils util; int nNames_old = util.getNumNames(data[binNumber]); data[binNumber] = seqNames; int nNames_new = util.getNumNames(seqNames); if(nNames_old == 0) { numBins++; } if(nNames_new == 0) { numBins--; } if(nNames_new > maxRank) { maxRank = nNames_new; } numSeqs += (nNames_new - nNames_old); } catch(exception& e) { m->errorOut(e, "ListVector", "set"); exit(1); } } /***********************************************************************/ string ListVector::get(int index){ if (index < data.size()) { return data[index]; } return ""; } /***********************************************************************/ void ListVector::setLabels(vector labels){ try { binLabels = labels; getLabels(); } catch(exception& e) { m->errorOut(e, "ListVector", "setLabels"); exit(1); } } /***********************************************************************/ //could potentially end up with duplicate binlabel names with code below. //we don't currently use them in a way that would do that. //if you had a listfile that had been subsampled and then added to it, dup names would be possible. vector ListVector::getLabels(){ try { Utils util; util.getOTUNames(binLabels, numBins, otuTag); return binLabels; } catch(exception& e) { m->errorOut(e, "ListVector", "getLabels"); exit(1); } } /***********************************************************************/ string ListVector::getOTUName(int bin){ try { if (binLabels.size() > bin) { } else { getLabels(); } return binLabels[bin]; } catch(exception& e) { m->errorOut(e, "ListVector", "getOTUName"); exit(1); } } /***********************************************************************/ int ListVector::getOTUTotal(string otuLabel){ try { //find bin number int binNumber = -1; getLabels(); for (int i = 0; i < binLabels.size(); i++) { if (util.getSimpleLabel(binLabels[i]) == util.getSimpleLabel(otuLabel)) { binNumber = i; break; } } if (binNumber == -1) { return 0; } return (util.getNumNames(get(binNumber))); } catch(exception& e) { m->errorOut(e, "ListVector", "getOTUTotal"); exit(1); } } /***********************************************************************/ void ListVector::push_back(string seqNames){ try { Utils util; data.push_back(seqNames); int nNames = util.getNumNames(seqNames); numBins++; if(nNames > maxRank) { maxRank = nNames; } numSeqs += nNames; } catch(exception& e) { m->errorOut(e, "ListVector", "push_back"); exit(1); } }/***********************************************************************/ int ListVector::push_back(string bin, int nNames, string binLabel){ try { if (binLabel == "") { //create one int otuNum = 1; bool notDone = true; //find label prefix string prefix = "Otu"; if (binLabels.size() != 0) { if (binLabels[binLabels.size()-1][0] == 'P') { prefix = "PhyloType"; } string tempLabel = binLabels[binLabels.size()-1]; string simpleLastLabel = util.getSimpleLabel(tempLabel); util.mothurConvert(simpleLastLabel, otuNum); otuNum++; } string potentialLabel = toString(otuNum); while (notDone) { if (m->getControl_pressed()) { notDone = false; break; } potentialLabel = toString(otuNum); vector::iterator it = find(binLabels.begin(), binLabels.end(), potentialLabel); if (it == binLabels.end()) { potentialLabel = prefix + toString(otuNum); it = find(binLabels.begin(), binLabels.end(), potentialLabel); if (it == binLabels.end()) { notDone = false; break; } } otuNum++; } binLabel = potentialLabel; } binLabels.push_back(binLabel); data.push_back(bin); numBins++; if(nNames > maxRank) { maxRank = nNames; } numSeqs += nNames; return 0; } catch(exception& e) { m->errorOut(e, "ListVector", "push_back"); exit(1); } } /***********************************************************************/ void ListVector::resize(int size){ data.resize(size); } /***********************************************************************/ int ListVector::size(){ return data.size(); } /***********************************************************************/ void ListVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; return data.clear(); } /***********************************************************************/ void ListVector::printHeaders(ostream& output, map& ct, bool sortPlease){ try { if (printListHeaders) { if (binLabels.size() == 0) { sortPlease = false; } //we are creating arbitary otuNames vector theseLabels = getLabels(); output << "label\tnum" + otuTag + "s"; if (sortPlease) { Utils util; vector hold; for (int i = 0; i < data.size(); i++) { if (data[i] != "") { vector binNames; string bin = data[i]; util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { map::iterator it = ct.find(binNames[j]); if (it == ct.end()) { m->mothurOut("[ERROR]: " + binNames[j] + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { total += ct[binNames[j]]; } } listCt temp(data[i], total, theseLabels[i]); hold.push_back(temp); } } sort(hold.begin(), hold.end(), abundNamesSort2); //print original label for sorted by abundance otu for (int i = 0; i < hold.size(); i++) { output << '\t' << hold[i].label; } }else { for (int i = 0; i < theseLabels.size(); i++) { output << '\t' << theseLabels[i]; } } output << endl; printListHeaders = false; } } catch(exception& e) { m->errorOut(e, "ListVector", "printHeaders"); exit(1); } } /***********************************************************************/ void ListVector::print(ostream& output, map& ct){ try { printHeaders(output, ct, true); output << label << '\t' << numBins; Utils util; vector hold; for (int i = 0; i < data.size(); i++) { if (data[i] != "") { vector binNames; string bin = data[i]; util.splitAtComma(bin, binNames); int total = 0; for (int j = 0; j < binNames.size(); j++) { map::iterator it = ct.find(binNames[j]); if (it == ct.end()) { m->mothurOut("[ERROR]: " + binNames[j] + " is not in your count table. Please correct.\n"); m->setControl_pressed(true); }else { total += ct[binNames[j]]; } } listCt temp(data[i], total, ""); hold.push_back(temp); } } sort(hold.begin(), hold.end(), abundNamesSort2); for(int i=0;ierrorOut(e, "ListVector", "print"); exit(1); } } /***********************************************************************/ void ListVector::print(ostream& output){ try { map ct; for (int i = 0; i < data.size(); i++) { if (data[i] != "") { string bin = data[i]; vector binNames; util.splitAtComma(bin, binNames); for (int j = 0; j < binNames.size(); j++) { ct[binNames[j]] = 1; } } } print(output, ct); } catch(exception& e) { m->errorOut(e, "ListVector", "print"); exit(1); } } /***********************************************************************/ //no sort for subsampling and get.otus and remove.otus void ListVector::print(ostream& output, bool sortOtus){ try { map ct; for (int i = 0; i < data.size(); i++) { if (data[i] != "") { string bin = data[i]; vector binNames; util.splitAtComma(bin, binNames); for (int j = 0; j < binNames.size(); j++) { ct[binNames[j]] = 1; } } } printHeaders(output, ct, sortOtus); output << label << '\t' << numBins; vector hold = data; if (sortOtus) { sort(hold.begin(), hold.end(), abundNamesSort); } //find first non blank otu int start = 0; for(int i=0;ierrorOut(e, "ListVector", "print"); exit(1); } } /***********************************************************************/ RAbundVector ListVector::getRAbundVector(){ try { RAbundVector rav; Utils util; for(int i=0;ierrorOut(e, "ListVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector ListVector::getSAbundVector(){ try { SAbundVector sav(maxRank+1); Utils util; for(int i=0;ierrorOut(e, "ListVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ OrderVector ListVector::getOrderVector(map* orderMap = nullptr){ try { Utils util; if(orderMap == nullptr){ vector ovData; for(int i=0;icount(seqName) == 0){ m->mothurOut(seqName + " not found, check *.names file\n"); exit(1); } ov.set((*orderMap)[seqName], i); seqName = ""; } } if(orderMap->count(seqName) == 0){ m->mothurOut(seqName + " not found, check *.names file\n"); exit(1); } ov.set((*orderMap)[seqName], i); } ov.setLabel(label); ov.getNumBins(); return ov; } } catch(exception& e) { m->errorOut(e, "ListVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/listvector.hpp000066400000000000000000000041741424121717000224130ustar00rootroot00000000000000#ifndef LIST_H #define LIST_H #include "datavector.hpp" /* DataStructure for a list file. This class is a child to datavector. It represents OTU information at a certain distance. A list vector can be converted into and ordervector, rabundvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = "a,b,c,d,e,f". example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class ListVector : public DataVector { public: ListVector(); ListVector(string); ListVector(int); ListVector(int, string); ListVector(string, vector, string&); ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels), otuTag(lv.otuTag), printListHeaders(lv.printListHeaders) {}; ListVector(ifstream&, string&, string&); ~ListVector(){}; int getNumBins() { return numBins; } int getNumSeqs() { return numSeqs; } int getMaxRank() { return maxRank; } void set(int, string); string get(int); vector getLabels(); string getOTUName(int bin); int getOTUTotal(string otuLabel); //returns 0 if otuLabel is not found void setLabels(vector); bool getPrintedLabels(); void setPrintedLabels(bool pl) { printListHeaders = pl; } void push_back(string); int push_back(string, int, string binLabel=""); void resize(int); void clear(); int size(); void print(ostream&); void print(ostream&, bool); void print(ostream&, map&); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); private: vector data; //data[i] is a list of names of sequences in the ith OTU. int maxRank; int numBins; int numSeqs; vector binLabels; string otuTag; bool printListHeaders; void printHeaders(ostream&, map&, bool); }; #endif mothur-1.48.0/source/datastructures/nameassignment.cpp000077500000000000000000000065531424121717000232270ustar00rootroot00000000000000 #include "nameassignment.hpp" //********************************************************************************************************************** NameAssignment::NameAssignment(string nameMapFile){ m = MothurOut::getInstance(); util.openInputFile(nameMapFile, fileHandle); } //********************************************************************************************************************** NameAssignment::NameAssignment(){ m = MothurOut::getInstance(); } //********************************************************************************************************************** void NameAssignment::readMap(){ try{ string firstCol, secondCol, skip; // int index = 0; map::iterator itData; int rowIndex = 0; while(fileHandle){ fileHandle >> firstCol; gobble(fileHandle); //read from first column fileHandle >> secondCol; //read from second column if (m->getDebug()) { m->mothurOut("[DEBUG]: firstCol = " + firstCol + ", secondCol= " + secondCol + "\n"); } itData = (*this).find(firstCol); if (itData == (*this).end()) { (*this)[firstCol] = rowIndex++; list.push_back(secondCol); //adds data's value to list reverse[rowIndex] = firstCol; }else{ m->mothurOut(firstCol + " is already in namesfile. I will use first definition.\n"); } gobble(fileHandle); } fileHandle.close(); } catch(exception& e) { m->errorOut(e, "NameAssignment", "readMap"); exit(1); } } //********************************************************************************************************************** void NameAssignment::push_back(string name) { try{ int num = (*this).size(); (*this)[name] = num; reverse[num] = name; list.push_back(name); } catch(exception& e) { m->errorOut(e, "NameAssignment", "push_back"); exit(1); } } //********************************************************************************************************************** ListVector NameAssignment::getListVector(void){ return list; } //********************************************************************************************************************** void NameAssignment::print(ostream& out){ try { map::iterator it; for(it = (*this).begin(); it!=(*this).end(); it++){ out << it->first << '\t' << it->second << endl; //prints out keys and values of the map this. } } catch(exception& e) { m->errorOut(e, "NameAssignment", "print"); exit(1); } } //********************************************************************************************************************** int NameAssignment::get(string key){ try { map::iterator itGet = (*this).find(key); //if you can't find it if (itGet == (*this).end()) { return -1; } return (*this)[key]; } catch(exception& e) { m->errorOut(e, "NameAssignment", "get"); exit(1); } } //********************************************************************************************************************** string NameAssignment::get(int key){ try { map::iterator itGet = reverse.find(key); if (itGet == reverse.end()) { return "not found"; } return reverse[key]; } catch(exception& e) { m->errorOut(e, "NameAssignment", "get"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/datastructures/nameassignment.hpp000077500000000000000000000007621424121717000232300ustar00rootroot00000000000000#ifndef NAMEASSIGNMENT_HPP #define NAMEASSIGNMENT_HPP #include "mothur.h" #include "listvector.hpp" #include "utils.hpp" class NameAssignment : public map { public: NameAssignment(string); NameAssignment(); ~NameAssignment(){} void readMap(); ListVector getListVector(); int get(string); string get(int); void print(ostream&); void push_back(string); private: ifstream fileHandle; ListVector list; map reverse; MothurOut* m; Utils util; }; #endif mothur-1.48.0/source/datastructures/oligos.cpp000077500000000000000000001145421424121717000215100ustar00rootroot00000000000000// // oligos.cpp // Mothur // // Created by Sarah Westcott on 4/4/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #include "oligos.h" #include "utils.hpp" /**************************************************************************************************/ Oligos::Oligos(string o){ try { m = MothurOut::getInstance(); hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true; indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0; oligosfile = o; reversePairs = true; readOligos(); if (pairedOligos) { numBarcodes = pairedBarcodes.size(); numFPrimers = pairedPrimers.size(); }else { numBarcodes = barcodes.size(); numFPrimers = primers.size(); } } catch(exception& e) { m->errorOut(e, "Oligos", "Oligos"); exit(1); } } /**************************************************************************************************/ Oligos::Oligos(){ try { m = MothurOut::getInstance(); hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true; indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0; numFPrimers = 0; numBarcodes = 0; } catch(exception& e) { m->errorOut(e, "Oligos", "Oligos"); exit(1); } } /**************************************************************************************************/ int Oligos::read(string o){ try { oligosfile = o; readOligos(); if (pairedOligos) { numBarcodes = pairedBarcodes.size(); numFPrimers = pairedPrimers.size(); }else { numBarcodes = barcodes.size(); numFPrimers = primers.size(); } return 0; } catch(exception& e) { m->errorOut(e, "Oligos", "read"); exit(1); } } /**************************************************************************************************/ int Oligos::read(string o, bool reverse){ try { oligosfile = o; reversePairs = reverse; readOligos(); if (pairedOligos) { numBarcodes = pairedBarcodes.size(); numFPrimers = pairedPrimers.size(); }else { numBarcodes = barcodes.size(); numFPrimers = primers.size(); } return 0; } catch(exception& e) { m->errorOut(e, "Oligos", "read"); exit(1); } } /**************************************************************************************************/ vector Oligos::getSRAGroupNames(){ try { vector sraGroupNames; set uniqueNames; if (pairedOligos) { for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ if (m->getControl_pressed()) { return sraGroupNames; } string primerName = getPrimerName(itPrimer->first); string barcodeName = getBarcodeName(itBar->first); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; string comboName = ""; if(primerName == ""){ comboGroupName = barcodeName; }else{ if(barcodeName == ""){ comboGroupName = primerName; } else{ comboGroupName = barcodeName + "." + primerName; } } if(((itPrimer->second).forward+(itPrimer->second).reverse) == ""){ if ((itBar->second).forward != "NONE") { comboName += (itBar->second).forward; } if ((itBar->second).reverse != "NONE") { if (comboName == "") { comboName += (itBar->second).reverse; } else { comboName += ("."+(itBar->second).reverse); } } }else{ if(((itBar->second).forward+(itBar->second).reverse) == ""){ if ((itPrimer->second).forward != "NONE") { comboName += (itPrimer->second).forward; } if ((itPrimer->second).reverse != "NONE") { if (comboName == "") { comboName += (itPrimer->second).reverse; } else { comboName += ("."+(itPrimer->second).reverse); } } } else{ if ((itBar->second).forward != "NONE") { comboName += (itBar->second).forward; } if ((itBar->second).reverse != "NONE") { if (comboName == "") { comboName += (itBar->second).reverse; } else { comboName += ("."+(itBar->second).reverse); } } if ((itPrimer->second).forward != "NONE") { if (comboName == "") { comboName += (itPrimer->second).forward; } else { comboName += ("."+(itPrimer->second).forward); } } if ((itPrimer->second).reverse != "NONE") { if (comboName == "") { comboName += (itPrimer->second).reverse; } else { comboName += ("."+(itPrimer->second).reverse); } } } } if (comboName != "") { comboGroupName += "_" + comboName; } uniqueNames.insert(comboGroupName); } } } }else { for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ string primerName = getPrimerName(itPrimer->second); string barcodeName = getBarcodeName(itBar->second); if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; string comboName = ""; if(primerName == ""){ comboGroupName = barcodeName; }else{ if(barcodeName == ""){ comboGroupName = primerName; } else{ comboGroupName = barcodeName + "." + primerName; } } if(itPrimer->first == ""){ comboName = itBar->first; }else{ if(itBar->first == ""){ comboName = itPrimer->first; } else{ comboName = itBar->first + "." + itPrimer->first; } } if (comboName != "") { comboGroupName += "_" + comboName; } uniqueNames.insert(comboGroupName); } } } } if (uniqueNames.size() == 0) { m->mothurOut("[ERROR]: your oligos file does not contain any group names.\n"); m->setControl_pressed(true); }else { if (m->getDebug()) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { sraGroupNames.push_back(*it); } } return sraGroupNames; } catch(exception& e) { m->errorOut(e, "Oligos", "getSRAGroupNames"); exit(1); } } //*************************************************************************************************************** int Oligos::readOligos(){ try { ifstream inOligos; Utils util; util.openInputFile(oligosfile, inOligos); string type, oligo, roligo, group; bool pfUsesNone = false; bool prUsesNone = false; bool bfUsesNone = false; bool brUsesNone = false; while(!inOligos.eof()){ string line = util.getline(inOligos); gobble(inOligos); if(line[0] == '#'){} //ignore else{ vector pieces = util.splitWhiteSpace(line); toUpper(pieces[0]); type = pieces[0];//make type case insensitive oligo = pieces[1]; formatOligo(oligo); if (m->getDebug()) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); } if(type == "FORWARD"){ group = ""; if (pieces.size() > 2) { group = pieces[2]; } //check for repeat primers map::iterator itPrime = primers.find(oligo); if (itPrime != primers.end()) { m->mothurOut("[WARNING]: primer " + oligo + " is in your oligos file already, disregarding.\n"); } else { if (m->getDebug()) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } } primers[oligo]=indexPrimer; indexPrimer++; primerNameVector.push_back(group); } } else if (type == "PRIMER"){ roligo = pieces[2]; formatOligo(roligo); if (oligo == "NONE") { pfUsesNone = true; } else if (roligo == "NONE") { prUsesNone = true; } if (roligo != "NONE") { if (reversePairs) { roligo = reverseOligo(roligo); } } group = ""; if (pieces.size() > 3) { group = pieces[3]; } oligosPair newPrimer(oligo, roligo); if (m->getDebug()) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); } //check for repeat barcodes string tempPair = oligo+roligo; if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("[WARNING]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already, disregarding.\n"); } else { uniquePrimers.insert(tempPair); if (m->getDebug()) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } } pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++; primerNameVector.push_back(group); hasPPrimers = true; } } else if(type == "REVERSE"){ string oligoRC = reverseOligo(oligo); revPrimer.push_back(oligoRC); } else if(type == "BARCODE"){ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info group = ""; if (pieces.size() == 3) { group = pieces[2]; //check for repeat barcodes map::iterator itBar = barcodes.find(oligo); if (itBar != barcodes.end()) { m->mothurOut("[WARNING]: barcode " + oligo + " is in your oligos file already, disregarding.\n"); } else { barcodes[oligo]=indexBarcode; indexBarcode++; barcodeNameVector.push_back(group); } } else if (pieces.size() == 4) { hasPBarcodes = true; string reverseBarcode = pieces[2]; formatOligo(reverseBarcode); group = pieces[3]; if (oligo == "NONE") { bfUsesNone = true; } else if (reverseBarcode == "NONE") { brUsesNone = true; } if (reverseBarcode != "NONE") { if (reversePairs) { reverseBarcode = reverseOligo(reverseBarcode); } } oligosPair newPair(oligo, reverseBarcode); if (m->getDebug()) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); } //check for repeat barcodes string tempPair = oligo+reverseBarcode; if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding.\n"); } else { uniqueBarcodes.insert(tempPair); pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++; barcodeNameVector.push_back(group); } } }else if(type == "LINKER"){ linker.push_back(oligo); }else if(type == "SPACER"){ spacer.push_back(oligo); } else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, forward, reverse, and barcode. Ignoring " + oligo + ".\n"); } } } inOligos.close(); if ((linker.size() == 0) && (spacer.size() == 0) && (pairedBarcodes.size() == 0) && (barcodes.size() == 0) && (pairedPrimers.size() == 0) && (primers.size() == 0) && (revPrimer.size() == 0)) { m->mothurOut("[ERROR]: invalid oligos file, quitting.\n"); m->setControl_pressed(true); return 0; } if (hasPBarcodes || hasPPrimers) { pairedOligos = true; if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->setControl_pressed(true); m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting.\n"); return 0; } //check for "NONE" to make sure if none is used then all primers in that position are NONE //ex. Can't have: PRIMER NONE reversePrimer and PRIMER fowardPrimer reversePrimer in same file if (bfUsesNone) { bool allNONE = true; for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ if ((itBar->second).forward != "NONE") { allNONE = false; break; } } if (!allNONE) { m->setControl_pressed(true); m->mothurOut("[ERROR]: cannot mix forwardBarcode=NONE and forwardBarcode=barcodeString in same file. Mothur assumes all sequences have forward barcodes or all do not, quitting.\n"); return 0; } } if (brUsesNone) { bool allNONE = true; for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ if ((itBar->second).reverse != "NONE") { allNONE = false; break; } } if (!allNONE) { m->setControl_pressed(true); m->mothurOut("[ERROR]: cannot mix reverseBarcode=NONE and reverseBarcode=barcodeString in same file. Mothur assumes all sequences have reverse barcodes or all do not, quitting.\n"); return 0; } } if (pfUsesNone) { bool allNONE = true; for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ if ((itPrimer->second).forward != "NONE") { allNONE = false; break; } } if (!allNONE) { m->setControl_pressed(true); m->mothurOut("[ERROR]: cannot mix forwardPrimer=NONE and forwardPrimer=primerString in same file. Mothur assumes all sequences have forward primers or all do not, quitting.\n"); return 0; } } if (prUsesNone) { bool allNONE = true; for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ if ((itPrimer->second).reverse != "NONE") { allNONE = false; break; } } if (!allNONE) { m->setControl_pressed(true); m->mothurOut("[ERROR]: cannot mix reversePrimer=NONE and reversePrimer=primerString in same file. Mothur assumes all sequences have reverse primers or all do not, quitting.\n"); return 0; } } } //add in potential combos if(barcodeNameVector.size() == 0){ if (pairedOligos) { oligosPair newPair("", ""); pairedBarcodes[0] = newPair; }else { barcodes[""] = 0; } barcodeNameVector.push_back(""); } if(primerNameVector.size() == 0){ if (pairedOligos) { oligosPair newPair("", ""); pairedPrimers[0] = newPair; }else { primers[""] = 0; } primerNameVector.push_back(""); } if (pairedOligos) { for(map::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){ for(map::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){ string primerName = primerNameVector[itPrimer->first]; string barcodeName = barcodeNameVector[itBar->first]; if (m->getDebug()) { m->mothurOut("[DEBUG]: primerName = " + primerName + " barcodeName = " + barcodeName + "\n"); } if ((primerName == "ignore") || (barcodeName == "ignore")) { if (m->getDebug()) { m->mothurOut("[DEBUG]: in ignore. \n"); } } //do nothing else if ((primerName == "") && (barcodeName == "")) { if (m->getDebug()) { m->mothurOut("[DEBUG]: in blank. \n"); } } //do nothing else { string comboGroupName = ""; string fastqFileName = ""; if(primerName == ""){ comboGroupName = barcodeNameVector[itBar->first]; }else{ if(barcodeName == ""){ comboGroupName = primerNameVector[itPrimer->first]; } else{ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first]; } } if (m->getDebug()) { m->mothurOut("[DEBUG]: comboGroupName = " + comboGroupName + "\n"); } uniqueNames.insert(comboGroupName); map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); if (itGroup2Barcode == Group2Barcode.end()) { vector tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse); Group2Barcode[comboGroupName] = tempBarcodes; }else { Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse); } itGroup2Barcode = Group2Primer.find(comboGroupName); if (itGroup2Barcode == Group2Primer.end()) { vector tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); Group2Primer[comboGroupName] = tempPrimers; }else { Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse); } } } } }else { for(map::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){ for(map::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){ string primerName = primerNameVector[itPrimer->second]; string barcodeName = barcodeNameVector[itBar->second]; if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing else if ((primerName == "") && (barcodeName == "")) { } //do nothing else { string comboGroupName = ""; string fastqFileName = ""; if(primerName == ""){ comboGroupName = barcodeNameVector[itBar->second]; } else{ if(barcodeName == ""){ comboGroupName = primerNameVector[itPrimer->second]; } else{ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second]; } } uniqueNames.insert(comboGroupName); map >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName); if (itGroup2Barcode == Group2Barcode.end()) { vector tempBarcodes; tempBarcodes.push_back(itBar->first); Group2Barcode[comboGroupName] = tempBarcodes; }else { Group2Barcode[comboGroupName].push_back(itBar->first); } itGroup2Barcode = Group2Primer.find(comboGroupName); if (itGroup2Barcode == Group2Primer.end()) { vector tempPrimers; tempPrimers.push_back(itPrimer->first); Group2Primer[comboGroupName] = tempPrimers; }else { Group2Primer[comboGroupName].push_back(itPrimer->first); } } } } } if (m->getDebug()) { int count = 0; for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } } Groups.clear(); for (set::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); } return 0; } catch(exception& e) { m->errorOut(e, "Oligos", "readOligos"); exit(1); } } //********************************************************************/ vector Oligos::getBarcodes(string groupName){ try { vector thisGroupsBarcodes; map >::iterator it = Group2Barcode.find(groupName); if (it == Group2Barcode.end()) { m->mothurOut("[ERROR]: no barcodes found for group " + groupName + ".\n"); m->setControl_pressed(true); }else { thisGroupsBarcodes = it->second; } return thisGroupsBarcodes; } catch(exception& e) { m->errorOut(e, "Oligos", "getBarcodes"); exit(1); } } //********************************************************************/ vector Oligos::getPrimers(string groupName){ try { vector thisGroupsPrimers; map >::iterator it = Group2Primer.find(groupName); if (it == Group2Primer.end()) { m->mothurOut("[ERROR]: no primers found for group " + groupName + ".\n"); m->setControl_pressed(true); }else { thisGroupsPrimers = it->second; } return thisGroupsPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getPrimers"); exit(1); } } //********************************************************************/ //can't have paired and unpaired so this function will either run the paired map or the unpaired map Oligos::getReversedPairedPrimers(){ try { map rpairedPrimers; for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { //add reverse compliment barcodes string forward = reverseOligo((it->second).forward); string reverse = reverseOligo((it->second).reverse); string temp = forward+reverse; if (uniquePrimers.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reversed version else { uniquePrimers.insert(temp); oligosPair tempPair(forward, reverse); //reverseforward, reverseReverse rpairedPrimers[it->first] = tempPair; } } for (map::iterator it = primers.begin(); it != primers.end(); it++) { //add reverse compliment barcodes string forward = reverseOligo(it->first); string reverse = ""; string temp = forward+reverse; if (uniquePrimers.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reversed version else { uniquePrimers.insert(temp); oligosPair tempPair(forward, reverse); //reverseforward, reverseReverse rpairedPrimers[it->second] = tempPair; } } return rpairedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReversedPairedPrimers"); exit(1); } } //********************************************************************/ //can't have paired and unpaired so this function will either run the paired map or the unpaired map Oligos::getReorientedPairedPrimers(){ try { map rpairedPrimers; for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { string forward = (it->second).reverse; if (reversePairs) { forward = reverseOligo(forward); } string reverse = (it->second).forward; if (reversePairs) { reverse = reverseOligo(reverse); } oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer rpairedPrimers[it->first] = tempPair; } for (map::iterator it = primers.begin(); it != primers.end(); it++) { oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode rpairedPrimers[it->second] = tempPair; } return rpairedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPairedPrimers"); exit(1); } } //********************************************************************/ vector Oligos::getReorientedReversePrimers(){ try { vector revReorientedPrimers; for (map::iterator it = primers.begin(); it != primers.end(); it++) { string reverse = reverseOligo((it->first)); //rc ForwardPrimer revReorientedPrimers.push_back(reverse); } return revReorientedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPairedPrimers"); exit(1); } } //********************************************************************/ map Oligos::getReorientedPrimers(){ try { map reorientedPrimers; for (int i = 0; i < revPrimer.size(); i++) { string primer = reverseOligo(revPrimer[i]); reorientedPrimers[primer] = -1; //ignore value } return reorientedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPrimers"); exit(1); } } //********************************************************************/ vector Oligos::getReversedReversePrimers(){ try { vector revReversedPrimers; for (int i = 0; i < revPrimer.size(); i++) { string primer = reverseOligo(revPrimer[i]); revReversedPrimers.push_back(primer); } return revReversedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPairedPrimers"); exit(1); } } //********************************************************************/ map Oligos::getReversedPrimers(){ try { map revReversedPrimers; for (map::iterator it = primers.begin(); it != primers.end(); it++) { string reverse = reverseOligo((it->first)); //rc ForwardPrimer revReversedPrimers[reverse] = it->second; } return revReversedPrimers; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPrimers"); exit(1); } } //********************************************************************/ //can't have paired and unpaired so this function will either run the paired map or the unpaired map Oligos::getReversedPairedBarcodes(){ try { map rpairedBarcodes; for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { //add reverse compliment barcodes string forward = reverseOligo((it->second).forward); string reverse = reverseOligo((it->second).reverse); string temp = forward+reverse; if (uniqueBarcodes.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reversed version else { uniqueBarcodes.insert(temp); oligosPair tempPair(forward, reverse); //reverseforward, reverseReverse rpairedBarcodes[it->first] = tempPair; } } for (map::iterator it = barcodes.begin(); it != barcodes.end(); it++) { //add reverse compliment barcodes string forward = reverseOligo(it->first); string reverse = ""; string temp = forward+reverse; if (uniqueBarcodes.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reversed version else { uniqueBarcodes.insert(temp); oligosPair tempPair(forward, reverse); //reverseforward, reverseReverse rpairedBarcodes[it->second] = tempPair; } } return rpairedBarcodes; } catch(exception& e) { m->errorOut(e, "Oligos", "getReversedPairedBarcodes"); exit(1); } } //********************************************************************/ //can't have paired and unpaired so this function will either run the paired map or the unpaired map Oligos::getReorientedPairedBarcodes(){ try { map rpairedBarcodes; for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { string forward = (it->second).reverse; if (reversePairs) { forward = reverseOligo(forward); } //forward is now the reverse of reverse string reverse = (it->second).forward; if (reversePairs) { reverse = reverseOligo(reverse); } //reverse is now the reverse of forward //check for repeat barcodes string temp = forward+reverse; if (uniqueBarcodes.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reoriented version else { uniqueBarcodes.insert(temp); oligosPair tempPair(forward, reverse); //(reverse, forward) or if reversePairs (rc_reverse, rc_forward) rpairedBarcodes[it->first] = tempPair; } } for (map::iterator it = barcodes.begin(); it != barcodes.end(); it++) { string forward = ""; string reverse = reverseOligo((it->first)); string temp = forward+reverse; if (uniqueBarcodes.count(temp) != 0) { } //already have this pair assigned to a sample, ignore reoriented version else { uniqueBarcodes.insert(temp); oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer rpairedBarcodes[it->second] = tempPair; } } return rpairedBarcodes; } catch(exception& e) { m->errorOut(e, "Oligos", "getReorientedPairedBarcodes"); exit(1); } } //********************************************************************/ string Oligos::reverseOligo(string oligo){ try { if (oligo == "NONE") { return "NONE"; } string reverse = ""; for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] == 'A') { reverse += 'T'; } else if(oligo[i] == 'T'){ reverse += 'A'; } else if(oligo[i] == 'U'){ reverse += 'A'; } else if(oligo[i] == 'G'){ reverse += 'C'; } else if(oligo[i] == 'C'){ reverse += 'G'; } else if(oligo[i] == 'R'){ reverse += 'Y'; } else if(oligo[i] == 'Y'){ reverse += 'R'; } else if(oligo[i] == 'M'){ reverse += 'K'; } else if(oligo[i] == 'K'){ reverse += 'M'; } else if(oligo[i] == 'W'){ reverse += 'W'; } else if(oligo[i] == 'S'){ reverse += 'S'; } else if(oligo[i] == 'B'){ reverse += 'V'; } else if(oligo[i] == 'V'){ reverse += 'B'; } else if(oligo[i] == 'D'){ reverse += 'H'; } else if(oligo[i] == 'H'){ reverse += 'D'; } else { reverse += 'N'; } } return reverse; } catch(exception& e) { m->errorOut(e, "Oligos", "reverseOligo"); exit(1); } } //********************************************************************/ void Oligos::formatOligo(string& oligo){ try { for(int i=0;ierrorOut(e, "Oligos", "formatOligo"); exit(1); } } //********************************************************************/ string Oligos::getBarcodeName(int index){ try { string name = ""; if ((index >= 0) && (index < barcodeNameVector.size())) { name = barcodeNameVector[index]; } return name; } catch(exception& e) { m->errorOut(e, "Oligos", "getBarcodeName"); exit(1); } } //********************************************************************/ string Oligos::getPrimerName(int index){ try { string name = ""; if ((index >= 0) && (index < primerNameVector.size())) { name = primerNameVector[index]; } return name; } catch(exception& e) { m->errorOut(e, "Oligos", "getPrimerName"); exit(1); } } //********************************************************************/ string Oligos::getGroupName(int barcodeIndex, int primerIndex){ try { string thisGroup = ""; if(numBarcodes != 0){ thisGroup = getBarcodeName(barcodeIndex); if (numFPrimers != 0) { if (getPrimerName(primerIndex) != "") { if(thisGroup != "") { thisGroup += "." + getPrimerName(primerIndex); }else { thisGroup = getPrimerName(primerIndex); } } } } return thisGroup; } catch(exception& e) { m->errorOut(e, "Oligos", "getGroupName"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/oligos.h000077500000000000000000000065221424121717000211530ustar00rootroot00000000000000// // oligos.h // Mothur // // Created by Sarah Westcott on 4/4/14. // Copyright (c) 2014 Schloss Lab. All rights reserved. // #ifndef Mothur_oligos_h #define Mothur_oligos_h #include "mothurout.h" /**************************************************************************************************/ class Oligos { public: Oligos(string); Oligos(); ~Oligos() = default; int read(string); int read(string, bool); //read without reversing the paired barcodes, for make.contigs. bool hasPairedPrimers() { return hasPPrimers; } bool hasPairedBarcodes() { return hasPBarcodes; } //for processing with trimOligos class map getPairedPrimers() { return pairedPrimers; } map getPairedBarcodes() { return pairedBarcodes; } map getReorientedPairedPrimers(); map getReorientedPairedBarcodes(); map getReversedPairedPrimers(); map getReversedPairedBarcodes(); map getPrimers() { return primers; } map getBarcodes() { return barcodes; } map getReversedPrimers(); vector getReversedReversePrimers(); map getReorientedPrimers(); vector getReorientedReversePrimers(); vector getLinkers() { return linker; } vector getSpacers() { return spacer; } vector getReversePrimers() { return revPrimer; } vector getPrimerNames() { return primerNameVector; } vector getBarcodeNames() { return barcodeNameVector; } vector getGroupNames() { return Groups; } vector getSRAGroupNames(); //for printing and other formatting uses vector getBarcodes(string); //get barcodes for a group. For paired barcodes will return forward.reverse vector getPrimers(string); //get primers for a group. For paired primers will return forward.reverse string getGroupName(int, int); string getBarcodeName(int); string getPrimerName(int); protected: set uniqueNames; vector Groups; vector revPrimer; map > Group2Barcode; map > Group2Primer; map pairedBarcodes; map pairedPrimers; map primers; map barcodes; vector linker; vector spacer; vector primerNameVector; vector barcodeNameVector; bool hasPPrimers, hasPBarcodes, pairedOligos, reversePairs; string oligosfile; int numBarcodes, numFPrimers; MothurOut* m; int indexPrimer; int indexBarcode; int indexPairedPrimer; int indexPairedBarcode; set uniquePrimers; set uniqueBarcodes; int readOligos(); string reverseOligo(string); void formatOligo(string&); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/optiblastmatrix.cpp000066400000000000000000000663231424121717000234420ustar00rootroot00000000000000// // optiblastmatrix.cpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "optiblastmatrix.hpp" /***********************************************************************/ OptiBlastMatrix::OptiBlastMatrix(string d, string nc, string f, bool s, double c, int l, float p, bool min) : OptiData(c), length(l), penalty(p), minWanted(min) { m = MothurOut::getInstance(); distFile = d; format = f; sim = s; if (format == "name") { namefile = nc; countfile = ""; } else if (format == "count") { countfile = nc; namefile = ""; } else { countfile = ""; namefile = ""; } readBlast(); } /***********************************************************************/ string OptiBlastMatrix::getOverlapName(long long index) { try { if (index > blastOverlap.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); return ""; } string name = overlapNameMap[index]; return name; } catch(exception& e) { m->errorOut(e, "OptiBlastMatrix", "getOverlapName"); exit(1); } } /***********************************************************************/ int OptiBlastMatrix::readBlast(){ try { Utils util; map nameAssignment; if (namefile != "") { util.readNames(namefile, nameAssignment); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, true); map temp = ct.getNameMap(); for (map::iterator it = temp.begin(); it!= temp.end(); it++) { nameAssignment[it->first] = it->second; } } else { readBlastNames(nameAssignment); } int count = 0; for (map::iterator it = nameAssignment.begin(); it!= nameAssignment.end(); it++) { it->second = count; count++; nameMap.push_back(it->first); overlapNameMap.push_back(it->first); } m->mothurOut("Reading Blast File... "); cout.flush(); string firstName, secondName, eScore, currentRow; currentRow = ""; string repeatName = ""; float distance, thisoverlap, refScore; float percentId; float numBases, mismatch, gap, startQuery, endQuery, startRef, endRef, score, lengthThisSeq; map thisRowsBlastScores; ///////////////////// Read to eliminate singletons /////////////////////// ifstream fileHandle; util.openInputFile(distFile, fileHandle); map singletonIndexSwap; map blastSingletonIndexSwap; vector singleton; singleton.resize(nameAssignment.size(), true); vector overlapSingleton; overlapSingleton.resize(nameAssignment.size(), true); vector< map > dists; dists.resize(nameAssignment.size()); if (!fileHandle.eof()) { //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); currentRow = firstName; lengthThisSeq = numBases; repeatName = firstName + secondName; if (firstName == secondName) { refScore = score; } else{ thisRowsBlastScores[secondName] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } int indexA = (itA->second); int indexB = (itB->second); overlapSingleton[indexA] = false; overlapSingleton[indexB] = false; blastSingletonIndexSwap[indexA] = indexA; blastSingletonIndexSwap[indexB] = indexB; } } }else { m->mothurOut("Error in your blast file, cannot read.\n"); exit(1); } while(fileHandle){ //let's assume it's a triangular matrix... if (m->getControl_pressed()) { fileHandle.close(); return 0; } //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file //if this is a new pairing if (temp != repeatName) { repeatName = temp; if (currentRow == firstName) { if (firstName == secondName) { refScore = score; } else{ //save score thisRowsBlastScores[secondName] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } int indexA = (itA->second); int indexB = (itB->second); overlapSingleton[indexA] = false; overlapSingleton[indexB] = false; blastSingletonIndexSwap[indexA] = indexA; blastSingletonIndexSwap[indexB] = indexB; } } //end else }else { //end row //convert blast scores to distance and add cell to sparse matrix if we can map::iterator it; map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameAssignment.find(currentRow); map::iterator itB = nameAssignment.find(it->first); itDist = dists[itB->second].find(itA->first); //if we have it then compare if (itDist != dists[itB->second].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { int indexA = (itA->second); int indexB = (itB->second); singleton[indexA] = false; singleton[indexB] = false; singletonIndexSwap[indexA] = indexA; singletonIndexSwap[indexB] = indexB; } //not going to need this again dists[itB->second].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out last rows info thisRowsBlastScores.clear(); currentRow = firstName; lengthThisSeq = numBases; //add this row to thisRowsBlastScores if (firstName == secondName) { refScore = score; } else{ //add this row to thisRowsBlastScores thisRowsBlastScores[secondName] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } int indexA = (itA->second); int indexB = (itB->second); overlapSingleton[indexA] = false; overlapSingleton[indexB] = false; blastSingletonIndexSwap[indexA] = indexA; blastSingletonIndexSwap[indexB] = indexB; } } }//end if current row }//end if repeat } fileHandle.close(); //convert blast scores to distance and add cell to sparse matrix if we can map::iterator it; map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameAssignment.find(currentRow); map::iterator itB = nameAssignment.find(it->first); itDist = dists[itB->second].find(itA->first); //if we have it then compare if (itDist != dists[itB->second].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { int indexA = (itA->second); int indexB = (itB->second); singleton[indexA] = false; singleton[indexB] = false; singletonIndexSwap[indexA] = indexA; singletonIndexSwap[indexB] = indexB; } //not going to need this again dists[itB->second].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out info thisRowsBlastScores.clear(); dists.clear(); ////////////////////////////////////////////////////////////////////////// int nonSingletonCount = 0; for (int i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are a singleton singletonIndexSwap[i] = nonSingletonCount; nonSingletonCount++; }else { singletons.push_back(nameMap[i]); } } singleton.clear(); int overlapNonSingletonCount = 0; for (int i = 0; i < overlapSingleton.size(); i++) { if (!overlapSingleton[i]) { //if you are a singleton blastSingletonIndexSwap[i] = overlapNonSingletonCount; overlapNonSingletonCount++; } } overlapSingleton.clear(); ifstream in; util.openInputFile(distFile, in); dists.resize(nameAssignment.size()); closeness.resize(nonSingletonCount); blastOverlap.resize(overlapNonSingletonCount); map names; if (namefile != "") { util.readNames(namefile, names); for (int i = 0; i < singletons.size(); i++) { singletons[i] = names[singletons[i]]; } } m->mothurOut(" halfway ... "); cout.flush(); if (!in.eof()) { //read in line from file in >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); currentRow = firstName; lengthThisSeq = numBases; repeatName = firstName + secondName; if (firstName == secondName) { refScore = score; } else{ //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[secondName] = score; if (namefile != "") { firstName = names[firstName]; //redundant names secondName = names[secondName]; //redundant names } nameMap[singletonIndexSwap[itA->second]] = firstName; nameMap[singletonIndexSwap[itB->second]] = secondName; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { int indexA = (itA->second); int indexB = (itB->second); int newB = blastSingletonIndexSwap[indexB]; int newA = blastSingletonIndexSwap[indexA]; blastOverlap[newA].insert(newB); blastOverlap[newB].insert(newA); overlapNameMap[newA] = firstName; overlapNameMap[newB] = secondName; } } }else { m->mothurOut("Error in your blast file, cannot read.\n"); exit(1); } while(in){ //let's assume it's a triangular matrix... if (m->getControl_pressed()) { fileHandle.close(); return 0; } //read in line from file in >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file //if this is a new pairing if (temp != repeatName) { repeatName = temp; if (currentRow == firstName) { if (firstName == secondName) { refScore = score; } else{ //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } //save score thisRowsBlastScores[secondName] = score; if (namefile != "") { firstName = names[firstName]; //redundant names secondName = names[secondName]; //redundant names } nameMap[singletonIndexSwap[itA->second]] = firstName; nameMap[singletonIndexSwap[itB->second]] = secondName; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { int indexA = (itA->second); int indexB = (itB->second); int newB = blastSingletonIndexSwap[indexB]; int newA = blastSingletonIndexSwap[indexA]; blastOverlap[newA].insert(newB); blastOverlap[newB].insert(newA); overlapNameMap[newA] = firstName; overlapNameMap[newB] = secondName; } } //end else }else { //end row //convert blast scores to distance and add cell to sparse matrix if we can map::iterator it; map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameAssignment.find(currentRow); map::iterator itB = nameAssignment.find(it->first); itDist = dists[itB->second].find(itA->first); //if we have it then compare if (itDist != dists[itB->second].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { int indexA = (itA->second); int indexB = (itB->second); int newB = singletonIndexSwap[indexB]; int newA = singletonIndexSwap[indexA]; closeness[newA].insert(newB); closeness[newB].insert(newA); } //not going to need this again dists[itB->second].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out last rows info thisRowsBlastScores.clear(); currentRow = firstName; lengthThisSeq = numBases; //add this row to thisRowsBlastScores if (firstName == secondName) { refScore = score; } else{ //add this row to thisRowsBlastScores //convert name to number map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("BError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[secondName] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { int indexA = (itA->second); int indexB = (itB->second); int newB = blastSingletonIndexSwap[indexB]; int newA = blastSingletonIndexSwap[indexA]; blastOverlap[newA].insert(newB); blastOverlap[newB].insert(newA); overlapNameMap[newA] = firstName; overlapNameMap[newB] = secondName; } } }//end if current row }//end if repeat } in.close(); //convert blast scores to distance and add cell to sparse matrix if we can for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameAssignment.find(currentRow); map::iterator itB = nameAssignment.find(it->first); itDist = dists[itB->second].find(itA->first); //if we have it then compare if (itDist != dists[itB->second].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { int indexA = (itA->second); int indexB = (itB->second); int newB = singletonIndexSwap[indexB]; int newA = singletonIndexSwap[indexA]; closeness[newA].insert(newB); closeness[newB].insert(newA); } //not going to need this again dists[itB->second].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out info thisRowsBlastScores.clear(); dists.clear(); nameAssignment.clear(); m->mothurOut(" done.\n"); return 1; } catch(exception& e) { m->errorOut(e, "OptiBlastMatrix", "readBlast"); exit(1); } } /*********************************************************************************************/ int OptiBlastMatrix::readBlastNames(map& nameAssignment) { try { m->mothurOut("Reading names... "); cout.flush(); string name, hold, prevName; int num = 0; ifstream in; Utils util; util.openInputFile(distFile, in); //read first line in >> prevName; for (int i = 0; i < 11; i++) { in >> hold; } gobble(in); //save name in nameMap nameAssignment[prevName] = num; num++; map::iterator it; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } //read line in >> name; for (int i = 0; i < 11; i++) { in >> hold; } gobble(in); //is this a new name? if (name != prevName) { prevName = name; it = nameAssignment.find(name); if (it != nameAssignment.end()) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups. Are you sequence names unique? quitting.\n"); m->setControl_pressed(true); } else { nameAssignment[name] = num; num++; } } } in.close(); if (m->getControl_pressed()) { return 0; } m->mothurOut(toString(num) + " names read.\n"); return 0; } catch(exception& e) { m->errorOut(e, "OptiBlastMatrix", "readBlastNames"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/optiblastmatrix.hpp000066400000000000000000000016351424121717000234420ustar00rootroot00000000000000// // optiblastmatrix.hpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef optiblastmatrix_hpp #define optiblastmatrix_hpp #include "optidata.hpp" class OptiBlastMatrix : public OptiData { public: OptiBlastMatrix(string, string, string, bool, double c, int l, float p, bool min); ~OptiBlastMatrix(){ } vector< set > getBlastOverlap() { return blastOverlap; } string getOverlapName(long long); //name from nameMap index protected: vector overlapNameMap; vector< set > blastOverlap; //empty unless reading a blast file. string distFile, namefile, countfile, format; bool sim, minWanted; float penalty; int length; int readBlast(); int readBlastNames(map& nameAssignment); }; #endif /* optiblastmatrix_hpp */ mothur-1.48.0/source/datastructures/optidata.cpp000066400000000000000000000126611424121717000220150ustar00rootroot00000000000000// // optidata.cpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "optidata.hpp" /***********************************************************************/ long long OptiData::print(ostream& out) { try { long long count = 0; for (long long i = 0; i < closeness.size(); i++) { out << i << '\t' << getName(i) << '\t'; for(set::iterator it = closeness[i].begin(); it != closeness[i].end(); it++){ out << *it << '\t'; count++; } out << endl; } out << endl; return count; } catch(exception& e) { m->errorOut(e, "OptiData", "print"); exit(1); } } /***********************************************************************/ long long OptiData::getNumClose(long long index) { try { if (index < 0) { return 0; } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); return 0; } else { return closeness[index].size(); } } catch(exception& e) { m->errorOut(e, "OptiData", "getNumClose"); exit(1); } } /***********************************************************************/ bool OptiData::isClose(long long i, long long toFind){ try { if (i < 0) { return false; } else if (i > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); return false; } bool found = false; if (closeness[i].count(toFind) != 0) { found = true; } return found; } catch(exception& e) { m->errorOut(e, "OptiData", "isClose"); exit(1); } } /***********************************************************************/ set OptiData::getCloseSeqs(long long i){ try { if (i < 0) { set temp; return temp; } else if (i > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); set temp; return temp; } return closeness[i]; } catch(exception& e) { m->errorOut(e, "OptiData", "getNumClose"); exit(1); } } /***********************************************************************/ //maps unique name to index in distance matrix //used by sensspec to get translate the list file name to the index name for closeness map OptiData::getNameIndexMap() { try { map nameIndexes; for (int i = 0; i < nameMap.size(); i++) { vector thisBinsSeqs; util.splitAtComma(nameMap[i], thisBinsSeqs); if (i < closeness.size()) { nameIndexes[thisBinsSeqs[0]] = i; } } return nameIndexes; } catch(exception& e) { m->errorOut(e, "OptiData", "getNameIndexMap"); exit(1); } } /***********************************************************************/ set OptiData::getIndexes(set seqs) { try { map nameIndexes = getNameIndexMap(); map::iterator it; set indexes; for (set::iterator itSeqs = seqs.begin(); itSeqs != seqs.end(); itSeqs++) { it = nameIndexes.find(*itSeqs); if (it != nameIndexes.end()) { indexes.insert(it->second); } } return indexes; } catch(exception& e) { m->errorOut(e, "OptiData", "getNameIndexMap"); exit(1); } } /***********************************************************************/ string OptiData::getName(long long index) { try { if (index < 0) { return ""; } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); return ""; } return nameMap[index]; } catch(exception& e) { m->errorOut(e, "OptiData", "getName"); exit(1); } } /***********************************************************************/ set OptiData::getNames(set indexes) { try { set names; for (set::iterator it = indexes.begin(); it != indexes.end(); it++) { if (m->getControl_pressed()) { break; } names.insert(getName(*it)); } return names; } catch(exception& e) { m->errorOut(e, "OptiData", "getNames"); exit(1); } } /***********************************************************************/ long long OptiData::getNumDists(){ try { long long foundDists = 0; for (int i = 0; i < closeness.size(); i++) { foundDists += closeness[i].size(); } return foundDists; } catch(exception& e) { m->errorOut(e, "OptiData", "getNumDists"); exit(1); } } /***********************************************************************/ ListVector* OptiData::getListSingle() { try { ListVector* singlelist = nullptr; if (singletons.size() == 0) { } else { singlelist = new ListVector(); for (int i = 0; i < singletons.size(); i++) { singlelist->push_back(singletons[i]); } } return singlelist; } catch(exception& e) { m->errorOut(e, "OptiData", "getListSingle"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/optidata.hpp000066400000000000000000000065431424121717000220240ustar00rootroot00000000000000// // optidata.hpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef optidata_hpp #define optidata_hpp #include "mothurout.h" #include "listvector.hpp" #include "sparsedistancematrix.h" #include "counttable.h" /* #ifdef UNIT_TEST friend class TestOptiMatrix; friend class FakeOptiMatrix; #endif */ class OptiData { public: OptiData(double c) { m = MothurOut::getInstance(); cutoff = c; } virtual ~OptiData(){} set getCloseSeqs(long long i); bool isClose(long long, long long); long long getNumClose(long long); map getNameIndexMap(); string getName(long long); //name from nameMap index set getNames(set); //name from nameMap index long long getNumSeqs() { return closeness.size(); } long long getNumSingletons() { return singletons.size(); } virtual long long getNumDists(); //number of distances under cutoff ListVector* getListSingle(); //for mgcluster - reading blast files virtual vector< set > getBlastOverlap() { vector< set > blank; return blank; } virtual string getOverlapName(long long) { return ""; } //name from nameMap index virtual void randomizeRefs(){}; virtual vector getRefSingletonNames() { vector temp; return temp; } virtual vector getTranslatedBins(vector >&, vector< vector >&) { vector temp; return temp; } virtual OptiData* extractRefMatrix() { OptiData* temp = nullptr; return temp; } virtual OptiData* extractMatrixSubset(set&) { OptiData* temp = nullptr; return temp; } virtual OptiData* extractMatrixSubset(set&) { OptiData* temp = nullptr; return temp; } virtual long long getNumFitSingletons() { return 0; } //user singletons virtual long long getNumFitDists() { return 0; } //user distances under cutoff virtual long long getNumRefDists() { return 0; } //ref distances under cutoff virtual ListVector* getFitListSingle() { ListVector* list = nullptr; return list; } virtual long long getNumFitTrueSingletons() { return 0; } virtual vector getRefSeqs() { vector temp; return temp; } virtual vector getFitSeqs() { vector temp; return temp; } virtual long long getNumFitSeqs() { return 0; } virtual long long getNumFitClose(long long) { return 0; } virtual long long getNumRefClose(long long) { return 0; } virtual set getCloseFitSeqs(long long i) { set temp; return temp; } virtual set getCloseRefSeqs(long long i) { set temp; return temp; } virtual bool isCloseFit(long long j, long long i, bool&) { return false; } virtual long long print(ostream&); protected: Utils util; MothurOut* m; vector< set > closeness; //closeness[0] contains indexes of seqs "close" to seq 0. vector singletons; //name of seqs with NO distances in matrix, if name file is given then it contains 2nd column of namefile vector nameMap; //name of seqs with distances in matrix, if name file is given then it contains 2nd column of namefile double cutoff; set getIndexes(set seqs); }; #endif /* optidata_hpp */ mothur-1.48.0/source/datastructures/optidb.cpp000066400000000000000000000236441424121717000214740ustar00rootroot00000000000000// // optidb.cpp // Mothur // // Created by Sarah Westcott on 3/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "optidb.hpp" /**************************************************************************************************/ OptiDB::OptiDB(string referenceFileName, string v) : SearchDatabase() { alignedLength = 0; baseMap['A'] = 0; baseMap['T'] = 1; baseMap['G'] = 2; baseMap['C'] = 3; baseMap['-'] = 4; baseMap['N'] = 5; numBases = baseMap.size(); //A,T,G,C,-,N version = v; optiDBName = referenceFileName.substr(0,referenceFileName.find_last_of(".")+1) + "optidb"; } /**************************************************************************************************/ vector< vector > OptiDB::get(int i, char& allSame) { try { vector< vector > thisDistribution; allSame = 'x'; vector thisColumn; if (alignedLength < i) { m->mothurOut("[ERROR]: The reference alignment length is " + toString(alignedLength) + ", but you are requesting column " + toString(i) + ", please correct.\n"); m->setControl_pressed(true); }else { thisColumn = reference.otuData[i]; } if (m->getControl_pressed()) { } //error in get column else { if (thisColumn.size() == 1) { //all sequences are the same in this column allSame = thisColumn[0]; }else { thisDistribution.resize(numBases); for (int i = 0; i < thisColumn.size(); i++) { thisDistribution[baseMap[thisColumn[i]]].push_back(i); } } } return thisDistribution; } catch(exception& e) { m->errorOut(e, "OptiDB", "get"); exit(1); } } /**************************************************************************************************/ //adds otu with seq as only reference included void OptiDB::addSequence(Sequence seq) { try { lengths.insert(seq.getAligned().length()); refs.push_back(seq); } catch(exception& e) { m->errorOut(e, "OptiDB", "addSequence"); exit(1); } } /**************************************************************************************************/ void OptiDB::convertSequences() { try { if (refs.size() == 0) { return; } //sanity check else { vector seqs; for (int i = 0; i < refs.size(); i++) { lengths.insert(refs[i].getAligned().length()); //convert '.' gaps to '-' string aligned = refs[i].getAligned(); for (int i = 0; i < aligned.length(); i++) { if (aligned[i] == '.') { aligned[i] = '-'; } } seqs.push_back(aligned); numSeqs++; } refs.clear(); reference.readSeqs(seqs); if (reference.numSeqs == 0) { m->mothurOut("[ERROR]: mothur expects the reference for opti_classifier to be aligned, please correct.\n"); m->setControl_pressed(true); alignedLength = 0; } } } catch(exception& e) { m->errorOut(e, "OptiDB", "convertSequences"); exit(1); } } /**************************************************************************************************/ void OptiDB::generateDB() { try { //check to make sure actually aligned if (lengths.size() == 1) { alignedLength = *lengths.begin(); longest = alignedLength-1; } //database stores longest for aligner (longest = longest+1) so remove one. else { m->mothurOut("[ERROR]: mothur expects the reference for opti_classifier to be aligned, please correct.\n"); m->setControl_pressed(true); return; } //creates reference convertSequences(); //finds columns in alignment with little noise that are included in query filter calcIndicatorColumns(); //create shortcut file for reading next time ofstream out; util.openOutputFile(optiDBName, out); //output version out << "#" << version << endl; //output reference aligned length out << alignedLength << endl; //output number of seqs in reference out << reference.numSeqs << endl; out << util.getStringFromVector(indicatorColumns, ",") << endl; for (int i = 0; i < reference.otuData.size(); i++) { //for each alignment location out << i << '\t' << reference.otuData[i].size() << '\t'; for (int j = 0; j < reference.otuData[i].size(); j++) { //for each reference, if all bases are the same in this location, size = 1; saves space out << reference.otuData[i][j]; } out << endl; } out.close(); } catch(exception& e) { m->errorOut(e, "OptiDB", "generateDB"); exit(1); } } /**************************************************************************************************/ void OptiDB::readDB(ifstream& optiDBFile){ try { optiDBFile.seekg(0); //read version string line = util.getline(optiDBFile); gobble(optiDBFile); //read alignedLength optiDBFile >> alignedLength; gobble(optiDBFile); longest = alignedLength-1; //read numSeqs in reference int numSeqs = 0; optiDBFile >> numSeqs; gobble(optiDBFile); line = util.getline(optiDBFile); gobble(optiDBFile); vector iCols; util.splitAtComma(line, iCols); for (int i = 0; i < iCols.size(); i++) { int temp; util.mothurConvert(iCols[i], temp); //this column is significant indicatorColumns.push_back(temp); } vector > refDistrib; refDistrib.resize(alignedLength); int location, size; string bases; for (int i = 0; i < alignedLength; i++) { //for each alignment location optiDBFile >> location >> size >> bases; gobble(optiDBFile); for (int j = 0; j < size; j++) { //for each reference, if all bases are the same in this location, size = 1; saves space refDistrib[location].push_back(bases[j]); } } optiDBFile.close(); reference.readSeqs(refDistrib, numSeqs); } catch(exception& e) { m->errorOut(e, "OptiDB", "readDB"); exit(1); } } /**************************************************************************************************/ //remove columns from list to process if they are not present in the query filter map OptiDB::filterIndicatorColumns(string filter, vector& filteredICols){ try { map colsMap; filteredICols.clear(); if (filter == "") { filter.resize(alignedLength, '1'); } //sanity check if (filter.length() != alignedLength) { m->mothurOut("[ERROR]: Your filter indicates your alignment length is " + toString(filter.length()) + ", but your reference files indicate an alignment length of " + toString(alignedLength) + ". Cannot continue.\n"); m->setControl_pressed(true); return colsMap; } //process filter information map colsPresentInQueryFiles; map::iterator it; int filterCount = 0; for (int i = 0; i < alignedLength; i++) { if (filter[i] == '1') { //cols to keep colsPresentInQueryFiles[i] = filterCount; filterCount++; } } set indicatorColsInTemplate; for (int i = 0; i < indicatorColumns.size(); i++) { indicatorColsInTemplate.insert(indicatorColumns[i]); } for (int i = 0; i < alignedLength; i++) { colsMap[i] = -1.0; //ignore col if (indicatorColsInTemplate.count(i) != 0) { //this is a template indicator column it = colsPresentInQueryFiles.find(i); if (it != colsPresentInQueryFiles.end()) { //this indicator column is present in the filtered query filteredICols.push_back(it->second); colsMap[i] = it->second; //use col } } } return colsMap; } catch(exception& e) { m->errorOut(e, "OptiDB", "filterIndicatorColumns"); exit(1); } } /**************************************************************************************************/ //an indicator column must have at least 50% of the bases the same void OptiDB::calcIndicatorColumns(){ try { for (int i = 0; i < reference.otuData.size(); i++) { //for each alignment location vector thisColumn = reference.otuData[i]; if (thisColumn.size() == 1) { } //all sequences are the same in this column, ignore else { vector counts; counts.resize(numBases, 0.0); //find occurances of each base for (int j = 0; j < thisColumn.size(); j++) { counts[baseMap[thisColumn[j]]]++; } //find percentages for (int k = 0; k < counts.size(); k++) { counts[k] /= numSeqs; if ((counts[k] > 0.50) && (counts[k] < 0.95)) { indicatorColumns.push_back(i); break; } } } } } catch(exception& e) { m->errorOut(e, "OptiDB", "calcIndicatorColumns"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/optidb.hpp000066400000000000000000000032541424121717000214740ustar00rootroot00000000000000// // optidb.hpp // Mothur // // Created by Sarah Westcott on 3/26/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef optidb_hpp #define optidb_hpp #include "sequence.hpp" #include "searchdatabase.hpp" #include "calculator.h" #include "optimatrix.h" class OptiDB : public SearchDatabase { public: OptiDB(string, string); //reference file name for shortcut file name generation, version ~OptiDB() = default; void addSequence(Sequence); void generateDB(); void readDB(ifstream&); vector getIndicatorColumns() { return indicatorColumns; } map getFilteredIndicatorColumns(string f, vector& results) { return filterIndicatorColumns(f, results); } vector< vector > get(int i, char& allSame); //A,T,G,C,-,N returns vector[6][numSeqsWithBase] -> vector[0] = vector of indexes of reference with A in location i, vector[1] = vector of indexes of reference with T in location i,ect. If allSame!='x', all characters are the same in this column, and will return blank vector. ie if allSame='A', every reference in this location is an A vector findClosestSequences(Sequence*, int, vector&) const { return nullIntVector; } private: string optiDBName, version; int alignedLength, numBases; classifierOTU reference; map baseMap; vector indicatorColumns; void convertSequences(); void calcIndicatorColumns(); map filterIndicatorColumns(string, vector&); //only used when generating db, not when reading shortcut files set lengths; vector refs; }; #endif /* optidb_hpp */ mothur-1.48.0/source/datastructures/optimatrix.cpp000077500000000000000000000313371424121717000224140ustar00rootroot00000000000000// // optimatrix.cpp // Mothur // // Created by Sarah Westcott on 4/20/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "optimatrix.h" #include "counttable.h" /***********************************************************************/ OptiMatrix::OptiMatrix(vector< set > close, vector name, vector single, double c) : OptiData(c) { closeness = close; nameMap = name; singletons = single; } /***********************************************************************/ OptiMatrix::OptiMatrix(string d, string nc, string f, string df, double c, bool s) : distFile(d), distFormat(df), format(f), sim(s), OptiData(c) { if (format == "name") { namefile = nc; countfile = ""; } else if (format == "count") { countfile = nc; namefile = ""; } else { countfile = ""; namefile = ""; } if (distFormat == "phylip") { readPhylip(); } else if (distFormat == "column") { readColumn(); } } /***********************************************************************/ int OptiMatrix::readPhylip(){ try { nameMap.clear(); float distance; long long nseqs; bool square = false; string name; map singletonIndexSwap; ifstream fileHandle; string numTest; Utils util; util.openInputFile(distFile, fileHandle); fileHandle >> numTest >> name; nameMap.push_back(name); singletonIndexSwap[0] = 0; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } //square test char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = true; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = false; break; } } vector singleton; singleton.resize(nseqs, true); ///////////////////// Read to eliminate singletons /////////////////////// if(square){ for(long long i=1;igetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> name; nameMap.push_back(name); singletonIndexSwap[i] = i; for(long long j=0;j> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ singleton[i] = false; singleton[j] = false; } } } }else{ for(long long i=1;igetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> name; nameMap.push_back(name); singletonIndexSwap[i] = i; for(long long j=0;j> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ singleton[i] = false; singleton[j] = false; } } } } fileHandle.close(); ////////////////////////////////////////////////////////////////////////// long long nonSingletonCount = 0; for (long long i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are not a singleton singletonIndexSwap[i] = nonSingletonCount; nonSingletonCount++; }else { singletons.push_back(nameMap[i]); } } singleton.clear(); closeness.resize(nonSingletonCount); map names; if (namefile != "") { util.readNames(namefile, names); for (long long i = 0; i < singletons.size(); i++) { singletons[i] = names[singletons[i]]; } } ifstream in; util.openInputFile(distFile, in); in >> nseqs >> name; if (namefile != "") { name = names[name]; } //redundant names nameMap[singletonIndexSwap[0]] = name; string line = ""; if(square){ int index = 0; for(long long i=1;igetControl_pressed()) { in.close(); return 0; } in >> name; gobble(in); if (namefile != "") { name = names[name]; } //redundant names nameMap[singletonIndexSwap[i]] = name; for(long long j=0;j> distance; gobble(in); if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ long long newB = singletonIndexSwap[j]; long long newA = singletonIndexSwap[i]; closeness[newA].insert(newB); closeness[newB].insert(newA); } index++; } } }else{ long long index = nseqs; for(long long i=0;i> distance; } gobble(in); for(long long i=1;igetControl_pressed()) { in.close(); return 0; } in >> name; gobble(in); if (namefile != "") { name = names[name]; } //redundant names nameMap[singletonIndexSwap[i]] = name; for(long long j=0;j> distance; gobble(in); if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ long long newB = singletonIndexSwap[j]; long long newA = singletonIndexSwap[i]; closeness[newA].insert(newB); closeness[newB].insert(newA); } index++; } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "OptiMatrix", "readPhylip"); exit(1); } } /***********************************************************************/ int OptiMatrix::readColumn(){ try { Utils util; map nameAssignment; if (namefile != "") { util.readNames(namefile, nameAssignment); } else { CountTable ct; ct.readTable(countfile, false, true); map temp = ct.getNameMap(); for (map::iterator it = temp.begin(); it!= temp.end(); it++) { nameAssignment[it->first] = it->second; } } long long count = 0; for (map::iterator it = nameAssignment.begin(); it!= nameAssignment.end(); it++) { it->second = count; count++; nameMap.push_back(it->first); } string firstName, secondName; float distance; ///////////////////// Read to eliminate singletons /////////////////////// ifstream fileHandle; util.openInputFile(distFile, fileHandle); vector singleton; singleton.resize(nameAssignment.size(), true); map singletonIndexSwap; while(fileHandle){ //let's assume it's a triangular matrix... fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; gobble(fileHandle); // get the row and column names and distance if (m->getDebug()) { cout << firstName << '\t' << secondName << '\t' << distance << endl; } if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the name or count file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the name or count file, please correct\n"); exit(1); } long long indexA = (itA->second); long long indexB = (itB->second); singleton[indexA] = false; singleton[indexB] = false; singletonIndexSwap[indexA] = indexA; singletonIndexSwap[indexB] = indexB; } } fileHandle.close(); ////////////////////////////////////////////////////////////////////////// long long nonSingletonCount = 0; for (long long i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are a singleton singletonIndexSwap[i] = nonSingletonCount; nonSingletonCount++; }else { singletons.push_back(nameMap[i]); } } singleton.clear(); closeness.resize(nonSingletonCount); map names; if (namefile != "") { util.readNames(namefile, names); for (long long i = 0; i < singletons.size(); i++) { singletons[i] = names[singletons[i]]; } } ifstream in; util.openInputFile(distFile, in); while(in){ //let's assume it's a triangular matrix... in >> firstName; gobble(in); in >> secondName; gobble(in); in >> distance; gobble(in); // get the row and column names and distance if (m->getDebug()) { cout << firstName << '\t' << secondName << '\t' << distance << endl; } if (m->getControl_pressed()) { in.close(); return 0; } if (util.isEqual(distance,-1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the name or count file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the name or count file, please correct\n"); exit(1); } long long indexA = (itA->second); long long indexB = (itB->second); long long newB = singletonIndexSwap[indexB]; long long newA = singletonIndexSwap[indexA]; closeness[newA].insert(newB); closeness[newB].insert(newA); if (namefile != "") { firstName = names[firstName]; //redundant names secondName = names[secondName]; //redundant names } nameMap[newA] = firstName; nameMap[newB] = secondName; } } in.close(); nameAssignment.clear(); return 1; } catch(exception& e) { m->errorOut(e, "OptiMatrix", "readColumn"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/optimatrix.h000077500000000000000000000014311424121717000220510ustar00rootroot00000000000000// // optimatrix.h // Mothur // // Created by Sarah Westcott on 4/20/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__optimatrix__ #define __Mothur__optimatrix__ #include "optidata.hpp" /* #ifdef UNIT_TEST OptiMatrix() : OptiData(0.03) {}; #endif */ class OptiMatrix : public OptiData { public: OptiMatrix(vector< set >, vector, vector, double); //closeness, namemap, singleton, cutoff OptiMatrix(string, string, string, string, double, bool); //distfile, dupsFile, dupsFormat, distFormat, cutoff, sim ~OptiMatrix(){} protected: string distFile, namefile, countfile, format, distFormat; bool sim; int readPhylip(); int readColumn(); }; #endif /* defined(__Mothur__optimatrix__) */ mothur-1.48.0/source/datastructures/optirefmatrix.cpp000066400000000000000000001302441424121717000231030ustar00rootroot00000000000000// // optirefmatrix.cpp // Mothur // // Created by Sarah Westcott on 5/3/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "optirefmatrix.hpp" #include "counttable.h" /***********************************************************************/ OptiRefMatrix::OptiRefMatrix(string distFile, string distFormat, string dupsFile, string dupsFormat, double c, float fP, string refWeight) : OptiData(c) { numFitSingletons = 0; numRefSingletons = 0; numSingletons = 0; numBetweenDists = 0; numFitDists = 0; numRefDists = 0; numFitSeqs = 0; refWeightMethod = refWeight; fitPercent = fP / 100.0; if (fitPercent < 0.001) { fitPercent = 0.10; m->mothurOut("[WARNING]: fit percentage must be between 0.001 (0.1%) and 1.0 (100%). Setting to 0.10 or 10%. \n"); } //minumum of 0.1% else if (fitPercent > 100.0) { m->mothurOut("[ERROR]: fit percentage must be between 0.0001 and 100.0\n"); m->setControl_pressed(true); } square = false; unordered_set noRefNamesSet; readFiles(distFile, distFormat, dupsFile, dupsFormat, noRefNamesSet); } /***********************************************************************/ OptiRefMatrix::OptiRefMatrix(string distFile, string distFormat, string dupsFile, string dupsFormat, double c, unordered_set accnosRefFileNames) : OptiData(c) { numFitSingletons = 0; numRefSingletons = 0; numSingletons = 0; numBetweenDists = 0; numFitDists = 0; numRefDists = 0; numFitSeqs = 0; refWeightMethod = "accnos"; square = false; readFiles(distFile, distFormat, dupsFile, dupsFormat, accnosRefFileNames); } /***********************************************************************/ OptiRefMatrix::OptiRefMatrix(string d, string nc, string f, string df, double c, string fit, string fitnc, string fitf, string fitdf, string betweend, string betweendf) : OptiData(c) { string refdistfile, refnamefile, refcountfile, refformat, refdistformat, fitdistfile, fitnamefile, fitcountfile, fitformat, fitdistformat, betweendistfile, betweendistformat; refdistfile = d; refdistformat = df; refformat = f; fitdistfile = fit; fitdistformat = fitdf; fitformat = fitf; betweendistfile = betweend; betweendistformat = betweendf; numFitSingletons = 0; numRefSingletons = 0; numSingletons = 0; numBetweenDists = 0; numFitDists = 0; numRefDists = 0; numFitSeqs = 0; fitPercent = 0; refWeightMethod = "none"; square = false; if (refformat == "name") { refnamefile = nc; refcountfile = ""; } else if (refformat == "count") { refcountfile = nc; refnamefile = ""; } else { refcountfile = ""; refnamefile = ""; } if (fitformat == "name") { fitnamefile = fitnc; fitcountfile = ""; } else if (fitformat == "count") { fitcountfile = fitnc; fitnamefile = ""; } else { fitcountfile = ""; fitnamefile = ""; } readFiles(refdistfile, refnamefile, refcountfile, refformat, refdistformat, fitdistfile, fitnamefile, fitcountfile, fitformat, fitdistformat, betweendistfile, betweendistformat); } /***********************************************************************/ //Since we are extracting a subset of the seqs some reads that may not have been singletons OptiData* OptiRefMatrix::extractRefMatrix() { try { set seqs; for (long long i = 0; i < isRef.size(); i++) { if (isRef[i]) { seqs.insert(i); } } vector subsetNameMap; vector subsetSingletons; vector< set > subsetCloseness; map thisNameMap; map nonSingletonNameMap; vector singleton; singleton.resize(seqs.size(), true); int count = 0; for (set::iterator it = seqs.begin(); it != seqs.end(); it++) { long long seqNum = *it; thisNameMap[seqNum] = count; nonSingletonNameMap[count] = seqNum; set thisSeqsCloseSeqs = getCloseSeqs(seqNum); for (set::iterator itClose = thisSeqsCloseSeqs.begin(); itClose != thisSeqsCloseSeqs.end(); itClose++) { if (m->getControl_pressed()) { break; } long long thisSeq = *itClose; //is this seq in the set of unfitted? if (seqs.count(thisSeq) != 0) { singleton[thisNameMap[seqNum]] = false; } } count++; } int nonSingletonCount = 0; for (long long i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are not a singleton nonSingletonNameMap[i] = nonSingletonCount; nonSingletonCount++; }else { seqs.erase(nonSingletonNameMap[i]); subsetSingletons.push_back(getName(nonSingletonNameMap[i])); } //remove from unfitted } singleton.clear(); subsetCloseness.resize(nonSingletonCount); for (set::iterator it = seqs.begin(); it != seqs.end(); it++) { if (m->getControl_pressed()) { break; } long long seqNum = *it; set thisSeqsCloseSeqs = getCloseSeqs(seqNum); set thisSeqsCloseUnFittedSeqs; for (set::iterator itClose = thisSeqsCloseSeqs.begin(); itClose != thisSeqsCloseSeqs.end(); itClose++) { if (m->getControl_pressed()) { break; } long long thisSeq = *itClose; //is this seq in the set of unfitted? if (seqs.count(thisSeq) != 0) { thisSeqsCloseUnFittedSeqs.insert(nonSingletonNameMap[thisNameMap[thisSeq]]); } } if (!thisSeqsCloseUnFittedSeqs.empty()) { subsetCloseness[nonSingletonNameMap[thisNameMap[seqNum]]] = thisSeqsCloseUnFittedSeqs; subsetNameMap.push_back(getName(seqNum)); } } for (int i = 0; i < isSingleRef.size(); i++) { if (isSingleRef[i]) { subsetSingletons.push_back(singletons[i]); } } OptiData* unfittedMatrix = new OptiMatrix(subsetCloseness, subsetNameMap, subsetSingletons, cutoff); return unfittedMatrix; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "extractRefMatrix"); exit(1); } } /***********************************************************************/ //given set of names, pull out their dists and create optimatrix OptiData* OptiRefMatrix::extractMatrixSubset(set& seqs) { try { set seqIndexes = getIndexes(seqs); return extractMatrixSubset(seqIndexes); } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "extractMatrixSubset"); exit(1); } } /***********************************************************************/ //given matrix indexes of seqs, pull out their dists and create optimatrix OptiData* OptiRefMatrix::extractMatrixSubset(set & seqs) { try { vector subsetNameMap; vector subsetSingletons; vector< set > subsetCloseness; map thisNameMap; map nonSingletonNameMap; vector singleton; singleton.resize(seqs.size(), true); int count = 0; for (set::iterator it = seqs.begin(); it != seqs.end(); it++) { long long seqNum = *it; thisNameMap[seqNum] = count; nonSingletonNameMap[count] = seqNum; set thisSeqsCloseSeqs = getCloseSeqs(seqNum); for (set::iterator itClose = thisSeqsCloseSeqs.begin(); itClose != thisSeqsCloseSeqs.end(); itClose++) { if (m->getControl_pressed()) { break; } long long thisSeq = *itClose; //is this seq in the set of unfitted? if (seqs.count(thisSeq) != 0) { singleton[thisNameMap[seqNum]] = false; } } count++; } int nonSingletonCount = 0; for (long long i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are a singleton nonSingletonNameMap[i] = nonSingletonCount; nonSingletonCount++; }else { seqs.erase(nonSingletonNameMap[i]); subsetSingletons.push_back(getName(nonSingletonNameMap[i])); } //remove from unfitted } singleton.clear(); subsetCloseness.resize(nonSingletonCount); for (set::iterator it = seqs.begin(); it != seqs.end(); it++) { if (m->getControl_pressed()) { break; } long long seqNum = *it; set thisSeqsCloseSeqs = getCloseSeqs(seqNum); set thisSeqsCloseUnFittedSeqs; for (set::iterator itClose = thisSeqsCloseSeqs.begin(); itClose != thisSeqsCloseSeqs.end(); itClose++) { if (m->getControl_pressed()) { break; } long long thisSeq = *itClose; //is this seq in the set of unfitted? if (seqs.count(thisSeq) != 0) { thisSeqsCloseUnFittedSeqs.insert(nonSingletonNameMap[thisNameMap[thisSeq]]); } } if (!thisSeqsCloseUnFittedSeqs.empty()) { subsetCloseness[nonSingletonNameMap[thisNameMap[seqNum]]] = thisSeqsCloseUnFittedSeqs; subsetNameMap.push_back(getName(seqNum)); } } OptiData* unfittedMatrix = new OptiMatrix(subsetCloseness, subsetNameMap, subsetSingletons, cutoff); return unfittedMatrix; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "extractMatrixSubset"); exit(1); } } /***********************************************************************/ vector OptiRefMatrix::getTranslatedBins(vector > & binNames, vector > & fixedBins) { try { fixedBins.clear(); map nameIndexes; set unique; for (long long i = 0; i < nameMap.size(); i++) { //vector of string representing the sequences in the matrix from the name file. vector thisSeqsReps; util.splitAtComma(nameMap[i], thisSeqsReps); //split redundant names if (i < closeness.size()) { nameIndexes[thisSeqsReps[0]] = i; } //this is a sequence with distances in the matrix if (thisSeqsReps.size() == 1) { //you are unique unique.insert(thisSeqsReps[0]); } } for (long long i = 0; i < singletons.size(); i++) { if (isSingleRef[i]) { vector thisSeqsReps; util.splitAtComma(singletons[i], thisSeqsReps); //split redundant names nameIndexes[thisSeqsReps[0]] = -1; if (thisSeqsReps.size() == 1) { unique.insert(thisSeqsReps[0]); } } } for (long long i = 0; i < binNames.size(); i++) { //for each OTU vector thisBinsSeqs; for (long long j = 0; j < binNames[i].size(); j++) { //for each sequence map::iterator it = nameIndexes.find(binNames[i][j]); if (it == nameIndexes.end()) { }//not in distance matrix, but needs a value in fixedBins. 2 reasons for making it here: you are a redundant name in the listfile, you do not have any distances else { thisBinsSeqs.push_back(it->second); } //"name" of sequence in matrix } fixedBins.push_back(thisBinsSeqs); } return (getFitSeqs()); } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getTranslatedBins"); exit(1); } } /***********************************************************************/ //assumes that i is a fitSeq bool OptiRefMatrix::isCloseFit(long long i, long long toFind, bool& isFit){ try { if (i < 0) { return false; } else if (i > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); return false; } bool found = false; if (!isRef[toFind]) { //are you a fit seq if (closeness[i].count(toFind) != 0) { //are you close found = true; } isFit = true; }else { isFit = false; } return found; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "isCloseFit"); exit(1); } } /***********************************************************************/ //does not include singletons, only reads in closeness vector OptiRefMatrix::getRefSeqs() { try { vector refSeqsIndexes; for (long long i = 0; i < isRef.size(); i++) { if (isRef[i]) { refSeqsIndexes.push_back(i); } } return refSeqsIndexes; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getRefSeqs"); exit(1); } } /***********************************************************************/ vector OptiRefMatrix::getRefSingletonNames() { try { vector refSeqsNames; for (long long i = 0; i < isSingleRef.size(); i++) { if (isSingleRef[i]) { refSeqsNames.push_back(singletons[i]); } } return refSeqsNames; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getRefSingletonNames"); exit(1); } } /***********************************************************************/ vector OptiRefMatrix::getFitSeqs() { try { vector fitSeqsIndexes; for (long long i = 0; i < isRef.size(); i++) { if (!isRef[i]) { fitSeqsIndexes.push_back(i); } } return fitSeqsIndexes; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getFitSeqs"); exit(1); } } /***********************************************************************/ long long OptiRefMatrix::getNumFitTrueSingletons() { try { long long numFitTrueSingletons = 0; for (long long i = 0; i < isSingleRef.size(); i++) { if (!isSingleRef[i]) { numFitTrueSingletons++; } } return numFitTrueSingletons; } catch(exception& e) { m->errorOut(e, "OptiData", "getNumFitTrueSingletons"); exit(1); } } /***********************************************************************/ long long OptiRefMatrix::getNumFitClose(long long index) { try { long long numClose = 0; if (index < 0) { } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); } else { //reference seqs all have indexes less than refEnd for (set::iterator it = closeness[index].begin(); it != closeness[index].end(); it++) { if (!isRef[*it]) { numClose++; } //you are a fit seq } } return numClose; } catch(exception& e) { m->errorOut(e, "OptiData", "getNumClose"); exit(1); } } /***********************************************************************/ long long OptiRefMatrix::getNumRefClose(long long index) { try { long long numClose = 0; if (index < 0) { } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); } else { //reference seqs all have indexes less than refEnd for (set::iterator it = closeness[index].begin(); it != closeness[index].end(); it++) { if (isRef[*it]) { numClose++; } //you are a ref seq } } return numClose; } catch(exception& e) { m->errorOut(e, "OptiData", "getNumClose"); exit(1); } } /***********************************************************************/ set OptiRefMatrix::getCloseFitSeqs(long long index){ try { set closeSeqs; if (index < 0) { } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); } // else { //reference seqs all have indexes less than refEnd for (set::iterator it = closeness[index].begin(); it != closeness[index].end(); it++) { if (!isRef[*it]) { closeSeqs.insert(*it); } //you are a fit seq } } return closeSeqs; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getCloseFitSeqs"); exit(1); } } /***********************************************************************/ set OptiRefMatrix::getCloseRefSeqs(long long index){ try { set closeSeqs; if (index < 0) { } else if (index > closeness.size()) { m->mothurOut("[ERROR]: index is not valid.\n"); m->setControl_pressed(true); } else { //reference seqs all have indexes less than refEnd for (set::iterator it = closeness[index].begin(); it != closeness[index].end(); it++) { if (isRef[*it]) { closeSeqs.insert(*it); } //you are a ref seq } } return closeSeqs; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getCloseFitSeqs"); exit(1); } } /***********************************************************************/ //only used in open reference clustering ListVector* OptiRefMatrix::getFitListSingle() { try { ListVector* singlelist = nullptr; if (singletons.size() == 0) { } else { singlelist = new ListVector(); for (int i = 0; i < isSingleRef.size(); i++) { if (!isSingleRef[i]) { singlelist->push_back(singletons[i]); } } } return singlelist; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "getFitListSingle"); exit(1); } } /***********************************************************************/ void OptiRefMatrix::randomizeRefs() { try { long long totalSeqs = (isRef.size()+isSingleRef.size()); long long numToSelect = totalSeqs * fitPercent; long long refSingletonCutoff = isRef.size(); long long singleSize = isSingleRef.size(); //select sequences to be reference set fitSeqsIndexes; if (weights.size() != 0) { fitSeqsIndexes = subsample.getWeightedSample(weights, numToSelect); } //you have weighted selection else { long long numSelected = 0; while (numSelected < numToSelect) { if (m->getControl_pressed()) { break; } fitSeqsIndexes.insert(util.getRandomIndex(totalSeqs-1)); //no repeats numSelected = fitSeqsIndexes.size(); } } //initilize isRef to true isRef.clear(); isRef.resize(refSingletonCutoff, true); isSingleRef.clear(); isSingleRef.resize(singleSize, true); //set isRef values for (set::iterator it = fitSeqsIndexes.begin(); it != fitSeqsIndexes.end(); it++) { if (m->getControl_pressed()) { break; } long long thisSeq = *it; if (thisSeq < refSingletonCutoff) { //you are a non singleton seq in the closeness isRef[thisSeq] = false; }else { //thisSeq is a singleton isSingleRef[thisSeq-refSingletonCutoff] = false; } } //find number of fitDists, refDists and between dists calcCounts(); } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "randomizeRefs"); exit(1); } } /***********************************************************************/ //for denovo method int OptiRefMatrix::readFiles(string distFile, string distFormat, string dupsFile, string dupsFormat, unordered_set& optionalRefNames) { try { string namefile, countfile; if (dupsFormat == "name") { namefile = dupsFile; countfile = ""; } else if (dupsFormat == "count") { countfile = dupsFile; namefile = ""; } else { countfile = ""; namefile = ""; } map nameAssignment; if (namefile != "") { util.readNames(namefile, nameAssignment); } else { CountTable ct; ct.readTable(countfile, false, true); map temp = ct.getNameMap(); for (map::iterator it = temp.begin(); it!= temp.end(); it++) { nameAssignment[it->first] = it->second; } } //select sequences to be reference set fitSeqsIndexes; long long count = 0; for (map::iterator it = nameAssignment.begin(); it!= nameAssignment.end(); it++) { if (refWeightMethod == "abundance") { weights[count] = it->second; } else if (refWeightMethod == "connectivity") { weights[count] = 1; } //initialize else if (refWeightMethod == "accnos") { //fill fit indexes if (optionalRefNames.count(it->first) == 0) { //you are not a reference sequence fitSeqsIndexes.insert(count); //add as fit seq } } it->second = count; count++; nameMap.push_back(it->first); nameAssignment[it->first] = it->second; } //read file to find singletons vector singleton; singleton.resize(count, true); map singletonIndexSwap; if (distFormat == "column") { singletonIndexSwap = readColumnSingletons(singleton, distFile, nameAssignment); } else if (distFormat == "phylip") { singletonIndexSwap = readPhylipSingletons(singleton, distFile, count, nameAssignment); } int nonSingletonCount = 0; for (int i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are not a singleton singletonIndexSwap[i] = nonSingletonCount; nonSingletonCount++; }else { singletons.push_back(nameMap[i]); } } numSingletons = singletons.size(); closeness.resize(nonSingletonCount); map names; if (namefile != "") { //update names for reference util.readNames(namefile, names); for (int i = 0; i < numSingletons; i++) { singletons[i] = names[singletons[i]]; } } //read reference file distances bool hasName = false; if (namefile != "") { hasName = true; } if (distFormat == "column") { readColumn(distFile, hasName, names, nameAssignment, singletonIndexSwap); } else if (distFormat == "phylip") { readPhylip(distFile, hasName, names, nameAssignment, singletonIndexSwap); } //randomly select the "fit" seqs long long numToSelect = nameAssignment.size() * fitPercent; if (weights.size() != 0) { fitSeqsIndexes = subsample.getWeightedSample(weights, numToSelect); } //you have weighted selection else { if (refWeightMethod == "accnos") { } //fitIndexes are filled above else { //randomly select references long long numSelected = 0; long long totalSeqs = nameAssignment.size(); while (numSelected < numToSelect) { if (m->getControl_pressed()) { break; } fitSeqsIndexes.insert(util.getRandomIndex(totalSeqs-1)); //no repeats numSelected = fitSeqsIndexes.size(); } } } //flag reference seqs singleton or not for (int i = 0; i < singleton.size(); i++) { if (!singleton[i]) { //if you are not a singleton if (fitSeqsIndexes.count(i) != 0) { //you are a fit seq isRef.push_back(false); }else { isRef.push_back(true); } //its a reference }else { if (fitSeqsIndexes.count(i) != 0) { //you are a fit seq singleton isSingleRef.push_back(false); }else { isSingleRef.push_back(true); } //its a singleton reference } } singleton.clear(); //find number of fitDists, refDists and between dists calcCounts(); return 0; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readFiles"); exit(1); } } /***********************************************************************/ //for reading reference and fit files separately, reference method int OptiRefMatrix::readFiles(string refdistfile, string refnamefile, string refcountfile, string refformat, string refdistformat, string fitdistfile, string fitnamefile, string fitcountfile, string fitformat, string fitdistformat, string betweendistfile, string betweendistformat){ try { map nameAssignment; if (refnamefile != "") { util.readNames(refnamefile, nameAssignment); } else { CountTable ct; ct.readTable(refcountfile, false, true); map temp = ct.getNameMap(); for (map::iterator it = temp.begin(); it!= temp.end(); it++) { nameAssignment[it->first] = it->second; } } long long count = 0; for (map::iterator it = nameAssignment.begin(); it!= nameAssignment.end(); it++) { it->second = count; count++; nameMap.push_back(it->first); nameAssignment[it->first] = it->second; } long long refCount = count; vector singleton; singleton.resize(count, true); //resize will only set new elements to true map refSingletonIndexSwap; //index into if (refdistformat == "column") { refSingletonIndexSwap = readColumnSingletons(singleton, refdistfile, nameAssignment); } else if (refdistformat == "phylip") { refSingletonIndexSwap = readPhylipSingletons(singleton, refdistfile, count, nameAssignment); } //read fit file to find singletons map fitSingletonIndexSwap; map fitnameAssignment; if (fitnamefile != "") { util.readNames(fitnamefile, fitnameAssignment); } else { CountTable ct; ct.readTable(fitcountfile, false, true); map temp = ct.getNameMap(); for (map::iterator it = temp.begin(); it!= temp.end(); it++) { fitnameAssignment[it->first] = it->second; } } for (map::iterator it = fitnameAssignment.begin(); it!= fitnameAssignment.end(); it++) { it->second = count; count++; nameMap.push_back(it->first); nameAssignment[it->first] = it->second; } singleton.resize(count, true); if (fitdistformat == "column") { fitSingletonIndexSwap = readColumnSingletons(singleton, fitdistfile, nameAssignment); } else if (fitdistformat == "phylip") { fitSingletonIndexSwap = readPhylipSingletons(singleton, fitdistfile, count, nameAssignment); } fitPercent = ((count-refCount) / (float) count); //read bewtween file to update singletons readColumnSingletons(singleton, betweendistfile, nameAssignment); long long nonSingletonCount = 0; map singletonIndexSwap; for (long long i = 0; i < refCount; i++) { if (!singleton[i]) { //if you are not a singleton singletonIndexSwap[i] = nonSingletonCount; isRef.push_back(true); nonSingletonCount++; }else { singletons.push_back(nameMap[i]); isSingleRef.push_back(true); } } refSingletonIndexSwap.clear(); for (long long i = refCount; i < singleton.size(); i++) { if (!singleton[i]) { //if you are not a singleton singletonIndexSwap[i] = nonSingletonCount; isRef.push_back(false); nonSingletonCount++; }else { singletons.push_back(nameMap[i]); isSingleRef.push_back(false); } } singleton.clear(); fitSingletonIndexSwap.clear(); numSingletons = singletons.size(); closeness.resize(nonSingletonCount); map names; if (refnamefile != "") { util.readNames(refnamefile, names); } if (fitnamefile != "") { map fitnames; util.readNames(fitnamefile, fitnames); names.insert(fitnames.begin(), fitnames.end()); //copy fit names into names } if ((fitnamefile != "") || (refnamefile != "")) { for (int i = 0; i < singletons.size(); i++) { map::iterator it = names.find(singletons[i]); if (it != names.end()) { //update singletons singletons[i] = it->second; } } } //read reference file distances bool refHasName = false; if (refnamefile != "") { refHasName = true; } if (refdistformat == "column") { readColumn(refdistfile, refHasName, names, nameAssignment, singletonIndexSwap); } else if (refdistformat == "phylip") { readPhylip(refdistfile, refHasName, names, nameAssignment, singletonIndexSwap); } //read fit distances bool fitHasName = false; if (fitnamefile != "") { fitHasName = true; } if (fitdistformat == "column") { readColumn(fitdistfile, fitHasName, names, nameAssignment, singletonIndexSwap); } else if (fitdistformat == "phylip") { readPhylip(fitdistfile, fitHasName, names, nameAssignment, singletonIndexSwap); } //read in between distances bool hasName = fitHasName; if (!hasName && refHasName) { hasName = true; } //if either the ref or fit has a name file then set hasName if (betweendistformat == "column") { readColumn(betweendistfile, hasName, names, nameAssignment, singletonIndexSwap); } else if (betweendistformat == "phylip") { readPhylip(betweendistfile, hasName, names, nameAssignment, singletonIndexSwap); } //find number of fitDists, refDists and between dists calcCounts(); return 0; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readFiles"); exit(1); } } /***********************************************************************/ map OptiRefMatrix::readColumnSingletons(vector& singleton, string distFile, map& nameAssignment){ try { ifstream fileHandle; util.openInputFile(distFile, fileHandle); string firstName, secondName; double distance; map singletonIndexSwap; while(fileHandle){ //let's assume it's a triangular matrix... fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; gobble(fileHandle); // get the row and column names and distance if (m->getDebug()) { cout << firstName << '\t' << secondName << '\t' << distance << endl; } if (m->getControl_pressed()) { break; } if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff){ map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the name or count file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the name or count file, please correct\n"); exit(1); } long long indexA = (itA->second); long long indexB = (itB->second); singleton[indexA] = false; singleton[indexB] = false; singletonIndexSwap[indexA] = indexA; singletonIndexSwap[indexB] = indexB; } } fileHandle.close(); return singletonIndexSwap; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readColumnSingletons"); exit(1); } } /***********************************************************************/ map OptiRefMatrix::readPhylipSingletons(vector& singleton, string distFile, long long& count, map& nameAssignment){ try { float distance; long long nseqs; string name; map singletonIndexSwap; ifstream fileHandle; string numTest; util.openInputFile(distFile, fileHandle); fileHandle >> numTest >> name; nameMap.push_back(name); singletonIndexSwap[0] = 0; nameAssignment[name] = 0; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); m->setControl_pressed(true); return singletonIndexSwap; } else { convert(numTest, nseqs); } //square test char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = true; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = false; break; } } singleton.resize((count+nseqs), true); if(square == 0){ for(long long i=1;igetControl_pressed()) { break; } fileHandle >> name; nameMap.push_back(name); singletonIndexSwap[i] = i; nameAssignment[name] = i; for(long long j=0;j> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff){ singleton[i] = false; singleton[j] = false; } } } }else{ for(long long i=1;igetControl_pressed()) { break; } fileHandle >> name; nameMap.push_back(name); singletonIndexSwap[i] = i; nameAssignment[name] = i; for(long long j=0;j> distance; if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff && j < i){ singleton[i] = false; singleton[j] = false; } } } } fileHandle.close(); count += nseqs; return singletonIndexSwap; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readPhylipSingletons"); exit(1); } } /***********************************************************************/ int OptiRefMatrix::readPhylip(string distFile, bool hasName, map& names, map& nameAssignment, map& singletonIndexSwap){ try { long long nseqs; string name; double distance; ifstream in; string numTest; util.openInputFile(distFile, in); in >> numTest >> name; if (hasName) { name = names[name]; } //redundant names nameMap[singletonIndexSwap[0]] = name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); m->setControl_pressed(true); return 0; } else { convert(numTest, nseqs); } //square test char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = true; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = false; break; } } string line = ""; if(!square){ for(long long i=1;igetControl_pressed()) { break; } in >> name; gobble(in); if (hasName) { name = names[name]; } //redundant names nameMap[singletonIndexSwap[i]] = name; for(long long j=0;j> distance; gobble(in); if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff){ if (refWeightMethod == "connectivity") { //count dists weights[i]++; weights[j]++; } long long newB = singletonIndexSwap[j]; long long newA = singletonIndexSwap[i]; closeness[newA].insert(newB); closeness[newB].insert(newA); } } } }else{ for(long long i=0;i> distance; } gobble(in); for(long long i=1;igetControl_pressed()) { break; } in >> name; gobble(in); if (hasName) { name = names[name]; } //redundant names nameMap[singletonIndexSwap[i]] = name; for(long long j=0;j> distance; gobble(in); if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff && j < i){ if (refWeightMethod == "connectivity") { //count dists weights[i]++; weights[j]++; } long long newB = singletonIndexSwap[j]; long long newA = singletonIndexSwap[i]; closeness[newA].insert(newB); closeness[newB].insert(newA); } } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readPhylip"); exit(1); } } /***********************************************************************/ int OptiRefMatrix::readColumn(string distFile, bool hasName, map& names, map& nameAssignment, map& singletonIndexSwap){ try { string firstName, secondName; double distance; ifstream in; util.openInputFile(distFile, in); while(in){ //let's assume it's a triangular matrix... in >> firstName; gobble(in); in >> secondName; gobble(in); in >> distance; gobble(in); // get the row and column names and distance if (m->getDebug()) { cout << firstName << '\t' << secondName << '\t' << distance << endl; } if (m->getControl_pressed()) { in.close(); return 0; } if (util.isEqual(distance,-1)) { distance = 1000000; } if(distance <= cutoff){ map::iterator itA = nameAssignment.find(firstName); map::iterator itB = nameAssignment.find(secondName); if(itA == nameAssignment.end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the name or count file, please correct\n"); exit(1); } if(itB == nameAssignment.end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the name or count file, please correct\n"); exit(1); } long long indexA = (itA->second); long long indexB = (itB->second); if (refWeightMethod == "connectivity") { //count dists weights[indexA]++; weights[indexB]++; } long long newB = singletonIndexSwap[indexB]; long long newA = singletonIndexSwap[indexA]; closeness[newA].insert(newB); closeness[newB].insert(newA); if (hasName) { map::iterator itName1 = names.find(firstName); map::iterator itName2 = names.find(secondName); if (itName1 != names.end()) { firstName = itName1->second; } //redundant names if (itName2 != names.end()) { secondName = itName2->second; } //redundant names } nameMap[newA] = firstName; nameMap[newB] = secondName; } } in.close(); return 1; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "readColumn"); exit(1); } } /***********************************************************************/ void OptiRefMatrix::calcCounts(){ try { //find number of fitDists, refDists and between dists numRefDists = 0; numFitDists = 0; numBetweenDists = 0; numFitSingletons = 0; numFitSeqs = 0; numRefSingletons = 0; for (long long i = 0; i < closeness.size(); i++) { if (m->getControl_pressed()) { break; } bool thisSeqIsRef = isRef[i]; long long thisSeqsNumRefDists = 0; long long thisSeqsNumFitDists = 0; for (set::iterator it = closeness[i].begin(); it != closeness[i].end(); it++) { long long newB = *it; if ((thisSeqIsRef) && (isRef[newB])) { thisSeqsNumRefDists++; } //both refs else if ((thisSeqIsRef) && (!isRef[newB])) { numBetweenDists++; } // ref to fit dist else if ((!thisSeqIsRef) && (isRef[newB])) { numBetweenDists++; } // fit to ref dist else if ((!thisSeqIsRef) && (!isRef[newB])) { thisSeqsNumFitDists++; } // both fit } //a refSingleton or Fitsingleton may not be a true singleton (no valid dists in matrix), but may be a refSeq with no distances to other refs but distances to fitseqs. a fitsingleton may have dists to refs but no dists to other fitseqs. //you are a ref with no refdists, so you are a refsingleton if ((thisSeqIsRef) && (thisSeqsNumRefDists == 0)) { numRefSingletons++; } else if ((!thisSeqIsRef) && (thisSeqsNumFitDists == 0)) { numFitSingletons++; } else if ((!thisSeqIsRef) && (thisSeqsNumFitDists != 0)) { numFitSeqs++; } numRefDists += thisSeqsNumRefDists; numFitDists += thisSeqsNumFitDists; } //counted twice numRefDists /= 2; numFitDists /= 2; numBetweenDists /= 2; } catch(exception& e) { m->errorOut(e, "OptiRefMatrix", "calcCounts"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/optirefmatrix.hpp000066400000000000000000000103521424121717000231050ustar00rootroot00000000000000// // optirefmatrix.hpp // Mothur // // Created by Sarah Westcott on 5/3/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef optirefmatrix_hpp #define optirefmatrix_hpp #include "optidata.hpp" #include "optimatrix.h" #include "subsample.h" /* Looking to easily access ref, fit and combined information to compare OTU assignments for the references, the sequences to fit, and the merged reference fit OTUs */ class OptiRefMatrix : public OptiData { public: OptiRefMatrix(string, string, string, string, double, float, string); //distfile, distFormat, dupsFile, dupsFormat, cutoff, percentage to be fitseqs, refWeightMethod (options: abundance, none, connectivity) OptiRefMatrix(string, string, string, string, double, unordered_set); //distfile, distFormat, dupsFile, dupsFormat, cutoff, accnosfile refNames OptiRefMatrix(string, string, string, string, double, string, string, string, string, string, string); //refdistfile, refname or refcount, refformat, refdistformat, cutoff, fitdistfile, fitname or fitcount, fitformat, fitdistformat, betweendistfile, betweendistformat - files for reference ~OptiRefMatrix(){ } vector getTranslatedBins(vector >&, vector< vector >&); OptiData* extractMatrixSubset(set&); OptiData* extractMatrixSubset(set&); OptiData* extractRefMatrix(); void randomizeRefs(); vector getRefSingletonNames(); long long getNumFitTrueSingletons(); //reads that are true singletons (no valid dists in matrix) and are flagged as fit long long getNumFitSingletons() { return numFitSingletons; } //user singletons long long getNumDists() { return (numFitDists+numRefDists+numBetweenDists); } //all distances under cutoff long long getNumFitDists() { return numFitDists; } //user distances under cutoff long long getNumRefDists() { return numRefDists; } //ref distances under cutoff ListVector* getFitListSingle(); vector getRefSeqs(); //every ref seq in matrix. Includes some that would have been singletons if not for the betweendistfile vector getFitSeqs(); //every fit seq in matrix. Includes some that would have been singletons if not for the betweendistfile long long getNumFitSeqs() { return numFitSeqs; } //only Fit seqs that are in fitdistfile and not singletons long long getNumFitClose(long long); long long getNumRefClose(long long); set getCloseFitSeqs(long long); set getCloseRefSeqs(long long); bool isCloseFit(long long, long long, bool&); protected: SubSample subsample; map weights; //seqeunce index in matrix to weight in chosing as reference string method, refWeightMethod; bool square; //a refSingleton or Fitsingleton may not be a true singleton (no valid dists in matrix), but may be a refSeq with no distances to other refs but distances to fitseqs. a fitsingleton may have dists to refs but no dists to other fitseqs. long long numFitDists, numRefDists, numRefSingletons, numFitSingletons, numBetweenDists, numSingletons, numFitSeqs; float fitPercent; int readPhylip(string distFile, bool hasName, map& names, map& nameAssignment, map& singletonIndexSwap); int readColumn(string distFile, bool hasName, map& names, map& nameAssignment, map& singletonIndexSwap); int readFiles(string, string, string, string, string, string, string, string, string, string, string, string); int readFiles(string, string, string, string, unordered_set&); map readColumnSingletons(vector& singleton, string distFile, map&); map readPhylipSingletons(vector& singleton, string distFile, long long&, map& nameAssignment); vector isRef; //same size as closeness, this tells us whether a seq with distances in the matrix is a reference vector isSingleRef; ////same size as singletons, this tells us whether a seq WITHOUT distances in the matrix (singleton) is a reference void calcCounts(); }; #endif /* optirefmatrix_hpp */ mothur-1.48.0/source/datastructures/ordervector.cpp000077500000000000000000000120711424121717000225440ustar00rootroot00000000000000/* * order.cpp * * * Created by Pat Schloss on 8/8/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "ordervector.hpp" /***********************************************************************/ OrderVector::OrderVector() : DataVector() {}; /***********************************************************************/ //OrderVector::OrderVector(int ns) : DataVector(), data(ns, -1) {} /***********************************************************************/ OrderVector::OrderVector(string id, vector ov) : DataVector(id), data(ov) { updateStats(); } /***********************************************************************/ OrderVector::OrderVector(ifstream& f) : DataVector() { try { int hold; f >> label; f >> hold; data.assign(hold, -1); int inputData; for(int i=0;i> inputData; set(i, inputData); } updateStats(); } catch(exception& e) { m->errorOut(e, "OrderVector", "OrderVector"); exit(1); } } /***********************************************************************/ int OrderVector::getNumBins(){ if(needToUpdate == 1){ updateStats(); } return numBins; } /***********************************************************************/ int OrderVector::getNumSeqs(){ if(needToUpdate == 1){ updateStats(); } return numSeqs; } /***********************************************************************/ int OrderVector::getMaxRank(){ if(needToUpdate == 1){ updateStats(); } return maxRank; } /***********************************************************************/ void OrderVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; data.clear(); } /***********************************************************************/ void OrderVector::set(int index, int binNumber){ data[index] = binNumber; needToUpdate = 1; } /***********************************************************************/ void OrderVector::set(vector v){ data = v; updateStats(); } /***********************************************************************/ int OrderVector::get(int index){ return data[index]; } /***********************************************************************/ void OrderVector::push_back(int index){ data.push_back(index); needToUpdate = 1; } /***********************************************************************/ void OrderVector::print(ostream& output){ try { output << label << '\t' << numSeqs; for(int i=0;ierrorOut(e, "OrderVector", "print"); exit(1); } } /***********************************************************************/ void OrderVector::print(string prefix, ostream& output){ try { output << prefix << '\t' << numSeqs; for(int i=0;ierrorOut(e, "OrderVector", "print"); exit(1); } } /***********************************************************************/ void OrderVector::resize(int){ m->mothurOut("resize() did nothing in class OrderVector"); } /***********************************************************************/ int OrderVector::size(){ return data.size(); } /***********************************************************************/ vector::iterator OrderVector::begin(){ return data.begin(); } /***********************************************************************/ vector::iterator OrderVector::end(){ return data.end(); } /***********************************************************************/ RAbundVector OrderVector::getRAbundVector(){ try { RAbundVector rav(data.size()); for(int i=0;i=0;i--){ if(rav.get(i) == 0){ rav.pop_back(); } else{ break; } } rav.setLabel(label); return rav; } catch(exception& e) { m->errorOut(e, "OrderVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector OrderVector::getSAbundVector(){ RAbundVector rav(this->getRAbundVector()); return rav.getSAbundVector(); } /***********************************************************************/ OrderVector OrderVector::getOrderVector(map* hold = 0){ return *this; } /***********************************************************************/ void OrderVector::updateStats(){ try { needToUpdate = 0; // int maxBinVectorLength = 0; numSeqs = 0; numBins = 0; maxRank = 0; for(int i=0;i hold(numSeqs); for(int i=0;i 0) { numBins++; } if(hold[i] > maxRank) { maxRank = hold[i]; } } } catch(exception& e) { m->errorOut(e, "OrderVector", "updateStats"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/ordervector.hpp000077500000000000000000000034011424121717000225460ustar00rootroot00000000000000#ifndef ORDER_H #define ORDER_H #include "datavector.hpp" #include "sabundvector.hpp" #include "rabundvector.hpp" /* This class is a child to datavector. It represents OTU information at a certain distance. A order vector can be converted into and listvector, rabundvector or sabundvector. Each member of the internal container "data" represents the OTU from which it came. So in the example below since there are 6 sequences in OTU 1 there are six 1's in the ordervector. and since there are 2 sequences in OTU 3 there are two 3's in the ordervector. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class OrderVector : public DataVector { public: OrderVector(); OrderVector(int ns, int nb=0, int mr=0) : DataVector(), data(ns, -1), maxRank(0), numBins(0), numSeqs(0) {}; OrderVector(const OrderVector& ov) : DataVector(ov.label), data(ov.data), maxRank(ov.maxRank), numBins(ov.numBins), numSeqs(ov.numSeqs), needToUpdate(ov.needToUpdate) {if(needToUpdate == 1){ updateStats();}}; OrderVector(string, vector); OrderVector(ifstream&); ~OrderVector(){}; vector get() { return data; } void set(vector); void set(int, int); int get(int); void push_back(int); void resize(int); int size(); void clear(); void print(string, ostream&); vector::iterator begin(); vector::iterator end(); void print(ostream&); int getNumBins(); int getNumSeqs(); int getMaxRank(); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); private: vector data; int maxRank; int numBins; int numSeqs; bool needToUpdate; void updateStats(); }; #endif mothur-1.48.0/source/datastructures/picrust.cpp000066400000000000000000000404071424121717000217000ustar00rootroot00000000000000// // picrust.cpp // Mothur // // Created by Sarah Westcott on 11/16/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "picrust.hpp" /**************************************************************************************************/ Picrust::Picrust(string ref, string otumapfile){ try { m = MothurOut::getInstance(); phyloTree = nullptr; read(ref, otumapfile); } catch(exception& e) { m->errorOut(e, "Picrust", "Picrust"); exit(1); } } /**************************************************************************************************/ Picrust::Picrust(){ try { m = MothurOut::getInstance(); phyloTree = nullptr; } catch(exception& e) { m->errorOut(e, "Picrust", "Picrust"); exit(1); } } /**************************************************************************************************/ Picrust::~Picrust(){ try { if (phyloTree != nullptr) { delete phyloTree; } } catch(exception& e) { m->errorOut(e, "Picrust", "Picrust"); exit(1); } } /**************************************************************************************************/ void Picrust::read(string ref, string otumapfile){ try { //read reftaxonomy phyloTree = new PhyloTree(ref); //read otu map file readGGOtuMap(otumapfile); //maps reference ID -> OTU ID } catch(exception& e) { m->errorOut(e, "Picrust", "read"); exit(1); } } //********************************************************************************************************************** void Picrust::setGGOTUIDs(map& labelTaxMap, SharedRAbundFloatVectors*& lookup){ try { map > ggOTUIDs; //loop through otu taxonomies for (map::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy if (m->getControl_pressed()) { break; } string OTUTaxonomy = it->second; //remove confidences util.removeConfidences(OTUTaxonomy); //remove unclassifieds to match template int thisPos = OTUTaxonomy.find("unclassified;"); //"Porphyromonadaceae"_unclassified; if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); thisPos = OTUTaxonomy.find_last_of(";"); //remove rest of parent taxon if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos+1); } } //get list of reference ids that map to this taxonomy vector referenceIds = phyloTree->getSeqs(OTUTaxonomy); if (m->getControl_pressed()) { break; } //look for each one in otu map to find match string otuID = "not found"; string referenceString = ""; for (int i = 0; i < referenceIds.size(); i++) { referenceString += referenceIds[i] + " "; map::iterator itMap = otuMap.find(referenceIds[i]); if (itMap != otuMap.end()) { //found it otuID = itMap->second; i += referenceIds.size(); //stop looking } } //if found, add otu to ggOTUID list if (otuID != "not found") { map >::iterator itGG = ggOTUIDs.find(otuID); if (itGG == ggOTUIDs.end()) { vector temp; temp.push_back(it->first); //save mothur OTU label ggOTUIDs[otuID] = temp; }else { ggOTUIDs[otuID].push_back(it->first); } //add mothur OTU label to list }else { m->mothurOut("[ERROR]: could not find OTUId for " + it->second + ". Its reference sequences are " + referenceString + ".\n"); m->setControl_pressed(true); } } vector newLookup; vector namesOfGroups = lookup->getNamesGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); temp->setLabel(lookup->getLabel()); temp->setGroup(namesOfGroups[i]); newLookup.push_back(temp); } map labelIndex; vector currentLabels = lookup->getOTUNames(); for (int i = 0; i < currentLabels.size(); i++) { labelIndex[util.getSimpleLabel(currentLabels[i])] = i; } vector newBinLabels; map newLabelTaxMap; //loop through ggOTUID list combining mothur otus and adjusting labels //ggOTUIDs = 16097 -> for (map >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) { if (m->getControl_pressed()) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return; } //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria //find taxonomy of this otu map::iterator it = labelTaxMap.find(util.getSimpleLabel(itMap->second[0])); vector scores; vector taxonomies = util.parseTax(it->second, scores); //merge/set OTU abundances vector abunds; abunds.resize(lookup->size(), 0.0); string mergeString = ""; vector boots; boots.resize(scores.size(), 0.0); bool scoresnullptr = false; for (int j = 0; j < itMap->second.size(); j++) { // if (scores[0] != "null") { //merge bootstrap scores vector scores; vector taxonomies = util.parseTax(it->second, scores); for (int i = 0; i < boots.size(); i++) { if (scores[i] == "null") { scoresnullptr = true; break; } else { float tempScore; util.mothurConvert(scores[i], tempScore); boots[i] += tempScore; } } }else { scoresnullptr = true; } //merge abunds mergeString += (itMap->second)[j] + " "; for (int i = 0; i < lookup->size(); i++) { abunds[i] += lookup->get(labelIndex[util.getSimpleLabel((itMap->second)[j])], namesOfGroups[i]); } } if (m->getDebug()) { m->mothurOut("[DEBUG]: merging " + mergeString + " for ggOTUid = " + itMap->first + ".\n"); } //average scores //add merged otu to new lookup string newTaxString = ""; if (!scoresnullptr) { for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); } //assemble new taxomoy for (int j = 0; j < boots.size(); j++) { newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");"; } }else { //assemble new taxomoy for (int j = 0; j < taxonomies.size(); j++) { newTaxString += taxonomies[j] + ";"; } } //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria //find taxonomy of this otu newLabelTaxMap[itMap->first] = newTaxString; //add merged otu to new lookup for (int j = 0; j < abunds.size(); j++) { newLookup[j]->push_back(abunds[j]); } //saved otu label newBinLabels.push_back(itMap->first); } lookup->clear(); for (int i = 0; i < newLookup.size(); i++) { lookup->push_back(newLookup[i]); } lookup->eliminateZeroOTUS(); lookup->setOTUNames(newBinLabels); labelTaxMap = newLabelTaxMap; return; } catch(exception& e) { m->errorOut(e, "Picrust", "setGGOTUIDs"); exit(1); } } //********************************************************************************************************************** void Picrust::setGGOTUIDs(map& labelTaxMap, SharedRAbundVectors*& lookup){ try { map > ggOTUIDs; //loop through otu taxonomies for (map::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy if (m->getControl_pressed()) { break; } string OTUTaxonomy = it->second; //remove confidences util.removeConfidences(OTUTaxonomy); //remove unclassifieds to match template int thisPos = OTUTaxonomy.find("unclassified;"); //"Porphyromonadaceae"_unclassified; if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); thisPos = OTUTaxonomy.find_last_of(";"); //remove rest of parent taxon if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos+1); } } //get list of reference ids that map to this taxonomy vector referenceIds = phyloTree->getSeqs(OTUTaxonomy); if (m->getControl_pressed()) { break; } //look for each one in otu map to find match string otuID = "not found"; string referenceString = ""; for (int i = 0; i < referenceIds.size(); i++) { referenceString += referenceIds[i] + " "; map::iterator itMap = otuMap.find(referenceIds[i]); if (itMap != otuMap.end()) { //found it otuID = itMap->second; i += referenceIds.size(); //stop looking } } //if found, add otu to ggOTUID list if (otuID != "not found") { map >::iterator itGG = ggOTUIDs.find(otuID); if (itGG == ggOTUIDs.end()) { vector temp; temp.push_back(it->first); //save mothur OTU label ggOTUIDs[otuID] = temp; }else { ggOTUIDs[otuID].push_back(it->first); } //add mothur OTU label to list }else { m->mothurOut("[ERROR]: could not find OTUId for " + it->second + ". Its reference sequences are " + referenceString + ".\n"); m->setControl_pressed(true); } } vector newLookup; vector namesOfGroups = lookup->getNamesGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(); temp->setLabel(lookup->getLabel()); temp->setGroup(namesOfGroups[i]); newLookup.push_back(temp); } map labelIndex; vector currentLabels = lookup->getOTUNames(); for (int i = 0; i < currentLabels.size(); i++) { labelIndex[util.getSimpleLabel(currentLabels[i])] = i; } vector newBinLabels; map newLabelTaxMap; //loop through ggOTUID list combining mothur otus and adjusting labels //ggOTUIDs = 16097 -> for (map >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) { if (m->getControl_pressed()) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return; } //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria //find taxonomy of this otu map::iterator it = labelTaxMap.find(util.getSimpleLabel(itMap->second[0])); vector scores; vector taxonomies = util.parseTax(it->second, scores); //merge/set OTU abundances vector abunds; abunds.resize(lookup->size(), 0.0); string mergeString = ""; vector boots; boots.resize(scores.size(), 0.0); bool scoresnullptr = false; for (int j = 0; j < itMap->second.size(); j++) { // if (scores[0] != "null") { //merge bootstrap scores vector scores; vector taxonomies = util.parseTax(it->second, scores); for (int i = 0; i < boots.size(); i++) { if (scores[i] == "null") { scoresnullptr = true; break; } else { float tempScore; util.mothurConvert(scores[i], tempScore); boots[i] += tempScore; } } }else { scoresnullptr = true; } //merge abunds mergeString += (itMap->second)[j] + " "; for (int i = 0; i < lookup->size(); i++) { abunds[i] += lookup->get(labelIndex[util.getSimpleLabel((itMap->second)[j])], namesOfGroups[i]); } } if (m->getDebug()) { m->mothurOut("[DEBUG]: merging " + mergeString + " for ggOTUid = " + itMap->first + ".\n"); } //average scores //add merged otu to new lookup string newTaxString = ""; if (!scoresnullptr) { for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); } //assemble new taxomoy for (int j = 0; j < boots.size(); j++) { newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");"; } }else { //assemble new taxomoy for (int j = 0; j < taxonomies.size(); j++) { newTaxString += taxonomies[j] + ";"; } } //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria //find taxonomy of this otu newLabelTaxMap[itMap->first] = newTaxString; //add merged otu to new lookup for (int j = 0; j < abunds.size(); j++) { newLookup[j]->push_back(abunds[j]); } //saved otu label newBinLabels.push_back(itMap->first); } lookup->clear(); for (int i = 0; i < newLookup.size(); i++) { lookup->push_back(newLookup[i]); } lookup->eliminateZeroOTUS(); lookup->setOTUNames(newBinLabels); labelTaxMap = newLabelTaxMap; return; } catch(exception& e) { m->errorOut(e, "Picrust", "setGGOTUIDs"); exit(1); } } //********************************************************************************************************************** void Picrust::readGGOtuMap(string otumapfile){ try { ifstream in; util.openInputFile(otumapfile, in); //map referenceIDs -> otuIDs //lines look like: //16097 671376 616121 533566 683683 4332909 4434717 772666 611808 695209 while(!in.eof()) { if (m->getControl_pressed()) { break; } string line = util.getline(in); gobble(in); vector pieces = util.splitWhiteSpace(line); if (pieces.size() != 0) { string otuID = pieces[1]; for (int i = 1; i < pieces.size(); i++) { otuMap[pieces[i]] = otuID; } } } in.close(); } catch(exception& e) { m->errorOut(e, "Picrust", "readGGOtuMap"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/picrust.hpp000066400000000000000000000020021424121717000216720ustar00rootroot00000000000000// // picrust.hpp // Mothur // // Created by Sarah Westcott on 11/16/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef picrust_hpp #define picrust_hpp #include "mothurout.h" #include "utils.hpp" #include "phylotree.h" #include "sharedrabundvectors.hpp" #include "sharedrabundfloatvectors.hpp" /**************************************************************************************************/ class Picrust { public: Picrust(string, string); //reference, otumap Picrust(); ~Picrust(); void read(string, string); void setGGOTUIDs(map&, SharedRAbundFloatVectors*&); void setGGOTUIDs(map&, SharedRAbundVectors*&); protected: MothurOut* m; Utils util; PhyloTree* phyloTree; map otuMap; void readGGOtuMap(string); //fills otuMap }; /**************************************************************************************************/ #endif /* picrust_hpp */ mothur-1.48.0/source/datastructures/protein.cpp000066400000000000000000000561321424121717000216710ustar00rootroot00000000000000// // protein.cpp // Mothur // // Created by Sarah Westcott on 5/24/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "protein.hpp" /***********************************************************************/ Protein::Protein(){ m = MothurOut::getInstance(); initialize(); } /***********************************************************************/ Protein::Protein(string newName, vector sequence) { try { m = MothurOut::getInstance(); initialize(); name = newName; util.checkName(name); setUnaligned(sequence); //setUnaligned removes any gap characters for us setAligned(sequence); } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } /***********************************************************************/ Protein::Protein(string newName, string seq) { try { m = MothurOut::getInstance(); initialize(); name = newName; util.checkName(name); vector sequence; for (int i = 0; i < seq.size(); i++) { AminoAcid temp(seq[i]); sequence.push_back(temp); } setUnaligned(sequence); //setUnaligned removes any gap characters for us setAligned(sequence); } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Protein::Protein(istringstream& fastaString){ try { m = MothurOut::getInstance(); initialize(); name = getProteinName(fastaString); if (!m->getControl_pressed()) { string proteinComment; //read comments while ((name[0] == '#') && fastaString) { while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there proteinComment = getCommentString(fastaString); if (fastaString) { fastaString >> name; name = name.substr(1); }else { name = ""; break; } } comment = getCommentString(fastaString); vector proteinSeq = getProtein(fastaString); setAligned(proteinSeq); setUnaligned(proteinSeq); //setUnaligned removes any gap characters for us } } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Protein::Protein(ifstream& fastaFile){ try { m = MothurOut::getInstance(); initialize(); name = getProteinName(fastaFile); if (!m->getControl_pressed()) { string proteinComment; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there proteinComment = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there comment = getCommentString(fastaFile); vector proteinSeq = getProtein(fastaFile); setAligned(proteinSeq); setUnaligned(proteinSeq); //setUnaligned removes any gap characters for us } } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq #ifdef USE_BOOST Protein::Protein(boost::iostreams::filtering_istream& fastaFile){ try { m = MothurOut::getInstance(); initialize(); name = getSequenceName(fastaFile); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there comment = getCommentString(fastaFile); vector proteinSeq = getProtein(fastaFile); setAligned(proteinSeq); setUnaligned(proteinSeq); //setUnaligned removes any gap characters for us } } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } #endif //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Protein::Protein(ifstream& fastaFile, string& extraInfo, bool getInfo){ try { m = MothurOut::getInstance(); initialize(); extraInfo = ""; name = getProteinName(fastaFile); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //read info after sequence name while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13 || c == -1){ break; } extraInfo += c; } comment = extraInfo; vector proteinSeq = getProtein(fastaFile); setAligned(proteinSeq); setUnaligned(proteinSeq); //setUnaligned removes any gap characters for us } } catch(exception& e) { m->errorOut(e, "Protein", "Protein"); exit(1); } } //******************************************************************************************************************** string Protein::getProteinName(ifstream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Protein", "getProteinName"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST string Protein::getSequenceName(boost::iostreams::filtering_istream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Protein", "getSequenceName"); exit(1); } } #endif //******************************************************************************************************************** string Protein::getProteinName(istringstream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Protein", "getProteinName"); exit(1); } } //******************************************************************************************************************** vector Protein::getProtein(ifstream& fastaFile) { try { char letter; vector protein; while(!fastaFile.eof()){ letter= fastaFile.get(); if(letter == '>'){ fastaFile.putback(letter); break; }else if (letter == ' ') {;} else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} AminoAcid amino(letter); protein.push_back(amino); } } return protein; } catch(exception& e) { m->errorOut(e, "Protein", "getProtein"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST vector Protein::getProtein(boost::iostreams::filtering_istream& fastaFile) { try { char letter; vector protein; while(fastaFile){ letter= fastaFile.get(); if(letter == '>'){ fastaFile.putback(letter); break; }else if (letter == ' ') {;} else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} AminoAcid amino(letter); protein.push_back(amino); } } return protein; } catch(exception& e) { m->errorOut(e, "Protein", "getProtein"); exit(1); } } #endif //******************************************************************************************************************** //comment can contain '>' so we need to account for that string Protein::getCommentString(ifstream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Protein", "getCommentString"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST //comment can contain '>' so we need to account for that string Protein::getCommentString(boost::iostreams::filtering_istream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Protein", "getCommentString"); exit(1); } } #endif //******************************************************************************************************************** vector Protein::getProtein(istringstream& fastaFile) { try { char letter; vector protein; while(!fastaFile.eof()){ letter= fastaFile.get(); if(letter == '>'){ fastaFile.putback(letter); break; }else if (letter == ' ') {;} else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} AminoAcid amino(letter); protein.push_back(amino); } } return protein; } catch(exception& e) { m->errorOut(e, "Protein", "getProtein"); exit(1); } } //******************************************************************************************************************** //comment can contain '>' so we need to account for that string Protein::getCommentString(istringstream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Protein", "getCommentString"); exit(1); } } //******************************************************************************************************************** void Protein::initialize(){ name = ""; unaligned.clear(); aligned.clear(); pairwise.clear(); comment = ""; numBases = 0; alignmentLength = 0; startPos = -1; endPos = -1; } //******************************************************************************************************************** void Protein::setName(string seqName) { if(seqName[0] == '>') { name = seqName.substr(1); } else { name = seqName; } } //******************************************************************************************************************** void Protein::setUnaligned(vector protein){ unaligned.clear(); for(int j=0;j sequence; for (int i = 0; i < seq.size(); i++) { AminoAcid temp(seq[i]); sequence.push_back(temp); } setAligned(sequence); } //******************************************************************************************************************** void Protein::setAligned(vector sequence){ //if the alignment starts or ends with a gap, replace it with a period to indicate missing data aligned.clear(); aligned = sequence; alignmentLength = aligned.size(); setUnaligned(sequence); if(aligned[0].getAmino() == '-'){ //convert ending gaps for(int i=0;i=0;i--){ if(aligned[i].getAmino() == '-'){ aligned[i].setAmino('.'); } else{ break; } } } } //******************************************************************************************************************** bool Protein::isAligned(){ for (int i = 0; i < aligned.size(); i++) { if ((aligned[i].getAmino() == '.') || (aligned[i].getAmino() == '-')) { return true; } } return false; } //******************************************************************************************************************** void Protein::setPairwise(vector sequence){ pairwise = sequence; } //******************************************************************************************************************** string Protein::getName(){ return name; } //******************************************************************************************************************** vector Protein::getAligned(){ return aligned; } //******************************************************************************************************************** string Protein::getProteinString(vector prot){ string inlinePro = ""; for (int i = 0; i < prot.size(); i++) { inlinePro += prot[i].getAmino(); } return inlinePro; } //******************************************************************************************************************** string Protein::getInlineProtein(){ string inlinePro = name + '\t'; inlinePro += getProteinString(aligned); return inlinePro; } //******************************************************************************************************************** vector Protein::getPairwise(){ return pairwise; } //******************************************************************************************************************** vector Protein::getUnaligned(){ return unaligned; } //******************************************************************************************************************** string Protein::getComment(){ return comment; } //******************************************************************************************************************** int Protein::getNumBases(){ return numBases; } //******************************************************************************************************************** void Protein::printProtein(OutputWriter* out){ string seqOutput = ">"; seqOutput += name + comment + '\n' + getProteinString(aligned) + '\n'; out->write(seqOutput); } //******************************************************************************************************************** void Protein::printProtein(ostream& out){ out << ">" << name << comment << endl; out << getProteinString(aligned) << endl; } //******************************************************************************************************************** void Protein::printUnAlignedProtein(ostream& out){ out << ">" << name << comment << endl; out << getProteinString(unaligned) << endl; } //******************************************************************************************************************** int Protein::getAlignLength(){ return alignmentLength; } //******************************************************************************************************************** int Protein::getStartPos(){ bool isAligned = false; if(startPos == -1){ for(int j = 0; j < alignmentLength; j++) { if((aligned[j].getAmino() != '.')&&(aligned[j].getAmino() != '-')){ startPos = j + 1; break; }else { isAligned = true; } } } if(!isAligned){ startPos = 1; } return startPos; } //******************************************************************************************************************** void Protein::filterToPos(int start){ if (start > aligned.size()) { start = aligned.size(); m->mothurOut("[ERROR]: start to large.\n"); } for(int j = 0; j < start; j++) { aligned[j].setAmino('.'); } //things like ......----------AT become ................AT for(int j = start; j < aligned.size(); j++) { if (isalpha(aligned[j].getAmino())) { break; } else { aligned[j].setAmino('.'); } } setUnaligned(aligned); } //******************************************************************************************************************** void Protein::filterFromPos(int end){ if (end > aligned.size()) { end = aligned.size(); m->mothurOut("[ERROR]: end to large.\n"); } for(int j = end; j < aligned.size(); j++) { aligned[j].setAmino('.'); } for(int j = aligned.size()-1; j < 0; j--) { if (isalpha(aligned[j].getAmino())) { break; } else { aligned[j].setAmino('.'); } } setUnaligned(aligned); } //******************************************************************************************************************** int Protein::getEndPos(){ bool isAligned = false; if (alignmentLength != numBases) { isAligned = true; } if(endPos == -1){ for(int j=alignmentLength-1;j>=0;j--){ if((aligned[j].getAmino() != '.')&&(aligned[j].getAmino() != '-')){ endPos = j + 1; break; }else { isAligned = true; } } } if(!isAligned){ endPos = numBases; } return endPos; } //******************************************************************************************************************** void Protein::padToPos(int start){ for(int j = getStartPos()-1; j < start-1; j++) { aligned[j].setAmino('.'); } startPos = start; } //******************************************************************************************************************** void Protein::padFromPos(int end){ for(int j = end; j < getEndPos(); j++) { aligned[j].setAmino('.'); } endPos = end; } //******************************************************************************************************************** void Protein::setComment(string c){ comment = c; } //******************************************************************************************************************** void Protein::trim(int length){ if(numBases > length){ unaligned.resize(length); numBases = length; setAligned(unaligned); } } ///**************************************************************************************************/ mothur-1.48.0/source/datastructures/protein.hpp000066400000000000000000000054511424121717000216740ustar00rootroot00000000000000// // protein.hpp // Mothur // // Created by Sarah Westcott on 5/24/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef protein_hpp #define protein_hpp #include "mothurout.h" #include "utils.hpp" #include "writer.h" #include "aminoacid.hpp" class Sequence; /**************************************************************************************************/ class Protein { #ifdef UNIT_TEST friend class TestProtein; #endif public: Protein(); Protein(string, string); Protein(string, vector); Protein(ifstream&); Protein(ifstream&, string&, bool); Protein(istringstream&); #ifdef USE_BOOST Protein(boost::iostreams::filtering_istream&); #endif ~Protein() = default; void setName(string); string getName(); void setUnaligned(vector); vector getUnaligned(); string getUnalignedString() { return getProteinString(unaligned); } void setAligned(vector); void setAligned(string); vector getAligned(); string getAlignedString() { return getProteinString(aligned); } void setComment(string); string getComment(); string getInlineProtein(); void setPairwise(vector); vector getPairwise(); string getCompressedDNA(); bool isAligned(); int getNumBases(); int getStartPos(); int getEndPos(); void trim(int); void padToPos(int); void padFromPos(int); void filterToPos(int); //any character before the pos is changed to . and aligned and unaligned strings changed void filterFromPos(int); //any character after the pos is changed to . and aligned and unaligned strings changed int getAlignLength(); void printProtein(ostream&); void printProtein(OutputWriter*); void printUnAlignedProtein(ostream&); protected: MothurOut* m; Utils util; void initialize(); vector getProtein(ifstream&); vector getProtein(istringstream&); string getCommentString(ifstream&); string getCommentString(istringstream&); string getProteinName(ifstream&); string getProteinName(istringstream&); string getProteinString(vector); #ifdef USE_BOOST string getCommentString(boost::iostreams::filtering_istream&); vector getProtein(boost::iostreams::filtering_istream&); string getSequenceName(boost::iostreams::filtering_istream&); #endif string name; vector unaligned; vector aligned; string comment; int numBases; int alignmentLength; int startPos, endPos; vector pairwise; }; /**************************************************************************************************/ #endif /* protein_hpp */ mothur-1.48.0/source/datastructures/proteindb.cpp000066400000000000000000000053451424121717000221770ustar00rootroot00000000000000// // proteindb.cpp // Mothur // // Created by Sarah Westcott on 6/3/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #include "proteindb.hpp" /***********************************************************************/ ProteinDB::ProteinDB() : StorageDatabase() { } /***********************************************************************/ //the clear function free's the memory ProteinDB::~ProteinDB() { data.clear(); } /***********************************************************************/ ProteinDB::ProteinDB(int newSize) : StorageDatabase() { data.resize(newSize, Protein()); } /***********************************************************************/ ProteinDB::ProteinDB(ifstream& filehandle) : StorageDatabase() { try{ //read through file while (!filehandle.eof()) { Protein newProteinSequence(filehandle); gobble(filehandle); if (newProteinSequence.getName() != "") { if (length == 0) { length = newProteinSequence.getAligned().size(); } if (length != newProteinSequence.getAligned().size()) { samelength = false; } data.push_back(newProteinSequence); } } filehandle.close(); } catch(exception& e) { m->errorOut(e, "ProteinDB", "ProteinDB"); exit(1); } } /***********************************************************************/ int ProteinDB::getNumSeqs() { return data.size(); } /***********************************************************************/ Protein ProteinDB::getProt(int index) { if ((index >= 0) && (index < data.size()) ) { return data[index]; } else { m->mothurOut("[ERROR]: invalid database index, please correct.\n"); m->setControl_pressed(true); Protein p; return p; } } /***********************************************************************/ void ProteinDB::push_back(Protein newProteinSequence) { try { if (length == 0) { length = newProteinSequence.getAligned().size(); } if (length != newProteinSequence.getAligned().size()) { samelength = false; } data.push_back(newProteinSequence); } catch(exception& e) { m->errorOut(e, "ProteinDB", "push_back"); exit(1); } } /***********************************************************************/ void ProteinDB::print(string outputFileName) { try { ofstream out; util.openOutputFile(outputFileName, out); for (int i = 0; i < data.size(); i++) { data[i].printProtein(out); } out.close(); } catch(exception& e) { m->errorOut(e, "ProteinDB", "print"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/proteindb.hpp000066400000000000000000000015571424121717000222050ustar00rootroot00000000000000// // proteindb.hpp // Mothur // // Created by Sarah Westcott on 6/3/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef proteindb_hpp #define proteindb_hpp #include "protein.hpp" #include "storagedatabase.hpp" class ProteinDB : public StorageDatabase { public: ProteinDB(); ProteinDB(int); //makes data that size ProteinDB(ifstream&); //reads file to fill data ProteinDB(const ProteinDB& sdb) : data(sdb.data) {}; ~ProteinDB(); //loops through data and delete each protein sequence Protein getProt(int); //returns sequence name at that location void push_back(Protein); //adds unaligned sequence void print(string); //prints fasta file containing sequences in this db int getNumSeqs(); private: vector data; }; #endif /* proteindb_hpp */ mothur-1.48.0/source/datastructures/qualityscores.cpp000077500000000000000000000452401424121717000231210ustar00rootroot00000000000000/* * qualityscores.cpp * Mothur * * Created by Pat Schloss on 7/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "qualityscores.h" /**************************************************************************************************/ QualityScores::QualityScores(){ try { m = MothurOut::getInstance(); seqName = ""; seqLength = -1; } catch(exception& e) { m->errorOut(e, "QualityScores", "QualityScores"); exit(1); } } /**************************************************************************************************/ QualityScores::QualityScores(string n, vector s){ try { m = MothurOut::getInstance(); setName(n); setScores(s); } catch(exception& e) { m->errorOut(e, "QualityScores", "QualityScores"); exit(1); } } /**************************************************************************************************/ QualityScores::QualityScores(ifstream& qFile){ try { m = MothurOut::getInstance(); int score; seqName = getSequenceName(qFile); gobble(qFile); getCommentString(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); } if (!m->getControl_pressed()) { string qScoreString = util.getline(qFile); gobble(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); } while(qFile.peek() != '>' && qFile.peek() != EOF){ if (m->getControl_pressed()) { break; } string temp = util.getline(qFile); gobble(qFile); qScoreString += ' ' + temp; } istringstream qScoreStringStream(qScoreString); int count = 0; while(!qScoreStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; qScoreStringStream >> temp; gobble(qScoreStringStream); //check temp to make sure its a number if (!util.isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0.\n"); temp = "0"; } convert(temp, score); if (score > 40) { score = 40; } qScores.push_back(score); count++; } } seqLength = qScores.size(); } catch(exception& e) { m->errorOut(e, "QualityScores", "QualityScores"); exit(1); } } /**************************************************************************************************/ #ifdef USE_BOOST QualityScores::QualityScores(boost::iostreams::filtering_istream& qFile){ try { m = MothurOut::getInstance(); int score; seqName = getSequenceName(qFile); gobble(qFile); getCommentString(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); } if (!m->getControl_pressed()) { string qScoreString = util.getline(qFile); gobble(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); } while(qFile.peek() != '>' && qFile.peek() != EOF){ if (m->getControl_pressed()) { break; } string temp = util.getline(qFile); gobble(qFile); qScoreString += ' ' + temp; } istringstream qScoreStringStream(qScoreString); int count = 0; while(!qScoreStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; qScoreStringStream >> temp; gobble(qScoreStringStream); //check temp to make sure its a number if (!util.isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0.\n"); temp = "0"; } convert(temp, score); if (score > 40) { score = 40; } qScores.push_back(score); count++; } } seqLength = qScores.size(); } catch(exception& e) { m->errorOut(e, "QualityScores", "QualityScores"); exit(1); } } #endif /**************************************************************************************************/ int QualityScores::read(ifstream& qFile){ try { int score; seqName = getSequenceName(qFile); gobble(qFile); getCommentString(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); } if (!m->getControl_pressed()) { string qScoreString = util.getline(qFile); gobble(qFile); if (m->getDebug()) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); } while(qFile.peek() != '>' && qFile.peek() != EOF){ if (m->getControl_pressed()) { break; } string temp = util.getline(qFile); gobble(qFile); qScoreString += ' ' + temp; } istringstream qScoreStringStream(qScoreString); int count = 0; while(!qScoreStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; qScoreStringStream >> temp; gobble(qScoreStringStream); //check temp to make sure its a number if (!util.isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0.\n"); temp = "0"; } convert(temp, score); if (score > 40) { score = 40; } qScores.push_back(score); count++; } } seqLength = qScores.size(); return seqLength; } catch(exception& e) { m->errorOut(e, "QualityScores", "read"); exit(1); } } //******************************************************************************************************************** string QualityScores::getSequenceName(ifstream& qFile) { try { string name = ""; qFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } return name; } catch(exception& e) { m->errorOut(e, "QualityScores", "getSequenceName"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST string QualityScores::getSequenceName(boost::iostreams::filtering_istream& qFile) { try { string name = ""; qFile >> name; string temp; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } return name; } catch(exception& e) { m->errorOut(e, "QualityScores", "getSequenceName"); exit(1); } } #endif //******************************************************************************************************************** //comment can contain '>' so we need to account for that string QualityScores::getCommentString(ifstream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "QualityScores", "getCommentString"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST //comment can contain '>' so we need to account for that string QualityScores::getCommentString(boost::iostreams::filtering_istream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "QualityScores", "getCommentString"); exit(1); } } #endif //******************************************************************************************************************** void QualityScores::setName(string name) { try { util.checkName(name); seqName = name; } catch(exception& e) { m->errorOut(e, "QualityScores", "setName"); exit(1); } } /**************************************************************************************************/ string QualityScores::getName(){ try { return seqName; } catch(exception& e) { m->errorOut(e, "QualityScores", "getName"); exit(1); } } /**************************************************************************************************/ void QualityScores::printQScores(OutputWriter* qFile){ try { double expected_errors = calculateExpectedErrors(); string outputQualString = ">"; outputQualString += seqName + '\t' + toString(expected_errors) + '\n'; for(int i=0;iwrite(outputQualString); } catch(exception& e) { m->errorOut(e, "QualityScores", "printQScores"); exit(1); } } /**************************************************************************************************/ void QualityScores::printQScores(ofstream& qFile){ try { double expected_errors = calculateExpectedErrors(); qFile << '>' << seqName << '\t' << expected_errors << endl; for(int i=0;ierrorOut(e, "QualityScores", "printQScores"); exit(1); } } /**************************************************************************************************/ void QualityScores::printQScores(ostream& qFile){ try { double expected_errors = calculateExpectedErrors(); qFile << '>' << seqName << '\t' << expected_errors << endl; for(int i=0;ierrorOut(e, "QualityScores", "printQScores"); exit(1); } } /**************************************************************************************************/ void QualityScores::trimQScores(int start, int end){ try { vector hold; if(end == -1){ hold = vector(qScores.begin()+start, qScores.end()); qScores = hold; } if(start == -1){ if(qScores.size() > end){ hold = vector(qScores.begin(), qScores.begin()+end); qScores = hold; } } seqLength = qScores.size(); } catch(exception& e) { m->errorOut(e, "QualityScores", "trimQScores"); exit(1); } } /**************************************************************************************************/ void QualityScores::flipQScores(){ try { vector temp = qScores; for(int i=0;ierrorOut(e, "QualityScores", "flipQScores"); exit(1); } } /**************************************************************************************************/ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){ try { string rawSequence = sequence.getUnaligned(); int seqLength = sequence.getNumBases(); if(seqName != sequence.getName()){ m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName); m->mothurOutEndLine(); m->setControl_pressed(true); } int end = 0; for(int i=0;ierrorOut(e, "QualityScores", "flipQScores"); exit(1); } } /**************************************************************************************************/ bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){ try { string rawSequence = sequence.getUnaligned(); int seqLength = sequence.getNumBases(); if(seqName != sequence.getName()){ m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName); m->mothurOutEndLine(); } int end = -1; double rollingSum = 0.0000; double value = 0.0; for(int i=0;ierrorOut(e, "QualityScores", "flipQScores"); exit(1); } } /**************************************************************************************************/ bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){ try { string rawSequence = sequence.getUnaligned(); int seqLength = sequence.getNumBases(); if(seqName != sequence.getName()){ m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName); m->mothurOutEndLine(); } int end = windowSize; int start = 0; if(seqLength < windowSize) { return 0; } while((start+windowSize) < seqLength){ double windowSum = 0.0000; for(int i=start;i= seqLength){ end = seqLength; } } if(end == -1){ end = seqLength; } //failed first window if (end < windowSize) { return 0; } sequence.setUnaligned(rawSequence.substr(0,end)); trimQScores(-1, end); return 1; } catch(exception& e) { m->errorOut(e, "QualityScores", "stripQualWindowAverage"); exit(1); } } /**************************************************************************************************/ double QualityScores::calculateExpectedErrors(void){ double expected_errors = 0.0000; for(int i=0;imothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName); m->mothurOutEndLine(); } double aveQScore = calculateAverage(logTransform); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); } if(aveQScore >= qAverage) { success = 1; } else { success = 0; } return success; } catch(exception& e) { m->errorOut(e, "QualityScores", "cullQualAverage"); exit(1); } } /**************************************************************************************************/ void QualityScores::updateQScoreErrorMap(map >& qualErrorMap, string errorSeq, int start, int stop, int weight){ try { int seqLength = errorSeq.size(); int qIndex = start - 1; for(int i=0;i stop){ break; } } } catch(exception& e) { m->errorOut(e, "QualityScores", "updateQScoreErrorMap"); exit(1); } } /**************************************************************************************************/ void QualityScores::updateForwardMap(vector >& forwardMap, int start, int stop, int weight){ try { int index = 0; for(int i=start-1;ierrorOut(e, "QualityScores", "updateForwardMap"); exit(1); } } /**************************************************************************************************/ void QualityScores::updateReverseMap(vector >& reverseMap, int start, int stop, int weight){ try { int index = 0; for(int i=stop-1;i>=start-1;i--){ reverseMap[index++][qScores[i]] += weight; } } catch(exception& e) { m->errorOut(e, "QualityScores", "updateReverseMap"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/qualityscores.h000077500000000000000000000040371424121717000225650ustar00rootroot00000000000000#ifndef QUALITYSCORES #define QUALITYSCORES /* * qualityscores.h * Mothur * * Created by Pat Schloss on 7/12/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ //DataStructure for a quality file. #include "mothur.h" #include "mothurout.h" #include "sequence.hpp" #include "utils.hpp" #include "writer.h" /**************************************************************************************************/ class QualityScores { public: QualityScores(); ~QualityScores() = default; QualityScores(string n, vector qs); QualityScores(ifstream&); #ifdef USE_BOOST QualityScores(boost::iostreams::filtering_istream&); #endif int read(ifstream&); string getName(); int getLength(){ return (int)qScores.size(); } //vector getQualityScores() { return qScores; } void printQScores(ofstream&); void printQScores(ostream&); void printQScores(OutputWriter*); void trimQScores(int, int); void flipQScores(); bool stripQualThreshold(Sequence&, double); bool stripQualRollingAverage(Sequence&, double, bool); bool stripQualWindowAverage(Sequence&, int, int, double, bool); bool cullQualAverage(Sequence&, double, bool); void updateQScoreErrorMap(map >&, string, int, int, int); void updateForwardMap(vector >&, int, int, int); void updateReverseMap(vector >&, int, int, int); void setName(string n); void setScores(vector qs) { qScores = qs; seqLength = (int)qScores.size(); } vector getScores() { return qScores; } private: double calculateAverage(bool); double calculateExpectedErrors(void); MothurOut* m; vector qScores; Utils util; string seqName; int seqLength; string getSequenceName(ifstream&); string getCommentString(ifstream&); #ifdef USE_BOOST string getCommentString(boost::iostreams::filtering_istream&); string getSequenceName(boost::iostreams::filtering_istream&); #endif }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/rabundfloatvector.cpp000077500000000000000000000222451424121717000237360ustar00rootroot00000000000000// // rabundfloatvector.cpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "rabundfloatvector.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "ordervector.hpp" /***********************************************************************/ RAbundFloatVector::RAbundFloatVector() : DataVector(), maxRank(0.0), numBins(0), numSeqs(0.0), group("") {} /***********************************************************************/ RAbundFloatVector::RAbundFloatVector(int n) : DataVector(), data(n,0) , maxRank(0), numBins(0), numSeqs(0), group("") {} /***********************************************************************/ //RAbundVector::RAbundVector(const RAbundVector& rav) : DataVector(rav), data(rav.data), (rav.label), (rav.maxRank), (rav.numBins), (rav.numSeqs){} /***********************************************************************/ RAbundFloatVector::RAbundFloatVector(string id, vector rav) : DataVector(id), data(rav), group("") { try { numBins = 0; maxRank = 0; numSeqs = 0; for(int i=0;i maxRank) { maxRank = data[i]; } numSeqs += data[i]; } } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "RAbundFloatVector"); exit(1); } } /***********************************************************************/ RAbundFloatVector::RAbundFloatVector(vector rav, float mr, int nb, float ns) : group(""){ try { numBins = nb; maxRank = mr; numSeqs = ns; data = rav; } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "RAbundFloatVector"); exit(1); } } /***********************************************************************/ RAbundFloatVector::RAbundFloatVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0), group("") { try { int hold; f >> label >> hold; data.assign(hold, 0); float inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "RAbundFloatVector"); exit(1); } } /***********************************************************************/ RAbundFloatVector::RAbundFloatVector(ifstream& f, string l, string g) : DataVector(), maxRank(0), numBins(0), numSeqs(0), group(g) { try { int hold; label = l; f >> hold; float inputData; for(int i=0;i> inputData; push_back(inputData); } } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "RAbundFloatVector"); exit(1); } } /***********************************************************************/ RAbundFloatVector::~RAbundFloatVector() = default; /***********************************************************************/ void RAbundFloatVector::set(int binNumber, float newBinSize){ try { float oldBinSize = data[binNumber]; data[binNumber] = newBinSize; if(util.isEqual(oldBinSize, 0)) { numBins++; } if(util.isEqual(newBinSize, 0)) { numBins--; } if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs += (newBinSize - oldBinSize); } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "set"); exit(1); } } /***********************************************************************/ float RAbundFloatVector::get(int index){ return data[index]; } /***********************************************************************/ void RAbundFloatVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; data.clear(); } /***********************************************************************/ void RAbundFloatVector::push_back(float binSize){ try { data.push_back(binSize); numBins++; if(binSize > maxRank){ maxRank = binSize; } numSeqs += binSize; } catch(exception& e) { m->errorOut(e, "RAbundFloatVector", "push_back"); exit(1); } } /***********************************************************************/ void RAbundFloatVector::pop_back(){ return data.pop_back(); } /***********************************************************************/ void RAbundFloatVector::resize(int size){ data.resize(size); } /***********************************************************************/ int RAbundFloatVector::size(){ return data.size(); } /***********************************************************************/ void RAbundFloatVector::quicksort(){ sort(data.rbegin(), data.rend()); } /***********************************************************************/ float RAbundFloatVector::sum(){ return sum(0); } /***********************************************************************/ float RAbundFloatVector::sum(int index){ float sum = 0; for(int i = index; i < data.size(); i++) { sum += data[i]; } return sum; } /***********************************************************************/ float RAbundFloatVector::remove(int bin){ try { float abund = data[bin]; data.erase(data.begin()+bin); numBins--; if(util.isEqual(abund, maxRank)){ maxRank = util.max(data); } numSeqs -= abund; return abund; } catch(exception& e) { m->errorOut(e, "RAbundVector", "remove"); exit(1); } } /***********************************************************************/ int RAbundFloatVector::numNZ(){ int numNZ = 0; for(int i = 0; i < data.size(); i++) { if(!util.isEqual(data[i], 0)) { numNZ++; } } return numNZ; } /***********************************************************************/ vector::reverse_iterator RAbundFloatVector::rbegin(){ return data.rbegin(); } /***********************************************************************/ vector::reverse_iterator RAbundFloatVector::rend(){ return data.rend(); } /***********************************************************************/ void RAbundFloatVector::nonSortedPrint(ostream& output){ try { output << label; if (group != "") { output << '\t' << group; } output << '\t' << numBins; for(int i=0;ierrorOut(e, "RAbundFloatVector", "nonSortedPrint"); exit(1); } } /***********************************************************************/ void RAbundFloatVector::print(ostream& output){ try { output << label; if (group != "") { output << '\t' << group; } output << '\t' << numBins; vector hold = data; sort(hold.rbegin(), hold.rend()); for(int i=0;ierrorOut(e, "RAbundFloatVector", "print"); exit(1); } } /***********************************************************************/ int RAbundFloatVector::getNumBins(){ return numBins; } /***********************************************************************/ float RAbundFloatVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ float RAbundFloatVector::getMaxRank(){ return maxRank; } /***********************************************************************/ RAbundFloatVector RAbundFloatVector::getRAbundFloatVector(){ return *this; } /***********************************************************************/ RAbundVector RAbundFloatVector::getRAbundVector(){ RAbundVector rav; rav.setLabel(label); for (int i = 0; i < data.size(); i++) { rav.push_back((int)data[i]); } return rav; } /***********************************************************************/ SAbundVector RAbundFloatVector::getSAbundVector() { try { SAbundVector sav(maxRank+1); for(int i=0;ierrorOut(e, "RAbundFloatVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ OrderVector RAbundFloatVector::getOrderVector(map* nameMap) { try { OrderVector ov; for(int i=0;ierrorOut(e, "RAbundFloatVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/rabundfloatvector.hpp000077500000000000000000000043261424121717000237430ustar00rootroot00000000000000// // rabundfloatvector.hpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef rabundfloatvector_hpp #define rabundfloatvector_hpp #include "datavector.hpp" /* Data Structure for a rabund file. This class is a child to datavector. It represents OTU information at a certain distance. A rabundvector can be converted into and ordervector, listvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = 6, because there are six member in that OTU. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class RAbundFloatVector : public DataVector { public: RAbundFloatVector(); RAbundFloatVector(int); RAbundFloatVector(vector, float, int, float); RAbundFloatVector(string, vector); RAbundFloatVector(const RAbundFloatVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group) {}; RAbundFloatVector(ifstream&); RAbundFloatVector(ifstream& f, string l, string g); //label, group ~RAbundFloatVector(); int getNumBins(); float getNumSeqs(); float getMaxRank(); void set(int, float); float get(int); vector get() { return data; } void push_back(float); float remove(int); void pop_back(); void resize(int); int size(); void quicksort(); float sum(); float sum(int); int numNZ(); void clear(); vector::reverse_iterator rbegin(); vector::reverse_iterator rend(); void print(ostream&); void nonSortedPrint(ostream&); RAbundFloatVector getRAbundFloatVector(); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map* hold = nullptr); string getGroup() { return group; } //group = "" for rabunds without groupInfo void setGroup(string g) { group = g; } private: vector data; float maxRank; int numBins; float numSeqs; string group; }; #endif /* rabundfloatvector_hpp */ mothur-1.48.0/source/datastructures/rabundvector.cpp000077500000000000000000000213431424121717000227060ustar00rootroot00000000000000/* * rabundvector.cpp * * * Created by Pat Schloss on 8/8/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "rabundvector.hpp" #include "sabundvector.hpp" #include "ordervector.hpp" #include "rabundfloatvector.hpp" /***********************************************************************/ RAbundVector::RAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} /***********************************************************************/ RAbundVector::RAbundVector(int n) : DataVector(), data(n,0) , maxRank(0), numBins(0), numSeqs(0) {} /***********************************************************************/ //RAbundVector::RAbundVector(const RAbundVector& rav) : DataVector(rav), data(rav.data), (rav.label), (rav.maxRank), (rav.numBins), (rav.numSeqs){} /***********************************************************************/ RAbundVector::RAbundVector(string id, vector rav) : DataVector(id), data(rav) { try { numBins = 0; maxRank = 0; numSeqs = 0; for(int i=0;i maxRank) { maxRank = data[i]; } numSeqs += data[i]; } } catch(exception& e) { m->errorOut(e, "RAbundVector", "RAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector::RAbundVector(vector rav) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { for(int i=0;ierrorOut(e, "RAbundVector", "RAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector::RAbundVector(vector rav, int mr, int nb, int ns) { try { numBins = nb; maxRank = mr; numSeqs = ns; data = rav; } catch(exception& e) { m->errorOut(e, "RAbundVector", "RAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector::RAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { int hold; f >> label >> hold; data.assign(hold, 0); int inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "RAbundVector", "RAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector::RAbundVector(ifstream& f, string l) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { label = l; f >> numBins; data.assign(numBins, 0); int inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "RAbundVector", "RAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector::~RAbundVector() { } /***********************************************************************/ void RAbundVector::set(int binNumber, int newBinSize){ try { int oldBinSize = data[binNumber]; data[binNumber] = newBinSize; if(oldBinSize == 0) { numBins++; } if(newBinSize == 0) { numBins--; } if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs += (newBinSize - oldBinSize); } catch(exception& e) { m->errorOut(e, "RAbundVector", "set"); exit(1); } } /***********************************************************************/ int RAbundVector::get(int index){ return data[index]; } /***********************************************************************/ void RAbundVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; data.clear(); } /***********************************************************************/ void RAbundVector::push_back(int binSize){ try { data.push_back(binSize); numBins++; if(binSize > maxRank){ maxRank = binSize; } numSeqs += binSize; } catch(exception& e) { m->errorOut(e, "RAbundVector", "push_back"); exit(1); } } /***********************************************************************/ int RAbundVector::remove(int bin){ try { int abund = data[bin]; data.erase(data.begin()+bin); numBins--; if(abund == maxRank){ vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; } numSeqs -= abund; return abund; } catch(exception& e) { m->errorOut(e, "RAbundVector", "remove"); exit(1); } } /***********************************************************************/ void RAbundVector::pop_back(){ return data.pop_back(); } /***********************************************************************/ void RAbundVector::resize(int size){ data.resize(size); } /***********************************************************************/ int RAbundVector::size(){ return data.size(); } /***********************************************************************/ void RAbundVector::quicksort(){ sort(data.rbegin(), data.rend()); } /***********************************************************************/ int RAbundVector::sum(){ Utils util; return util.sum(data); } /***********************************************************************/ int RAbundVector::sum(int index){ int sum = 0; for(int i = index; i < data.size(); i++) { sum += data[i]; } return sum; } /***********************************************************************/ int RAbundVector::numNZ(){ int numNZ = 0; for(int i = 0; i < data.size(); i++) { if(data[i] != 0) { numNZ++; } } return numNZ; } /***********************************************************************/ vector RAbundVector::getSortedD(){ vector temp; temp = data; sort(temp.begin()+1, temp.end()); return temp; } /***********************************************************************/ vector::reverse_iterator RAbundVector::rbegin(){ return data.rbegin(); } /***********************************************************************/ vector::reverse_iterator RAbundVector::rend(){ return data.rend(); } /***********************************************************************/ void RAbundVector::nonSortedPrint(ostream& output){ try { output << label; output << '\t' << numBins; for(int i=0;ierrorOut(e, "RAbundVector", "nonSortedPrint"); exit(1); } } /***********************************************************************/ void RAbundVector::print(ostream& output){ try { output << label; output << '\t' << numBins; vector hold = data; sort(hold.rbegin(), hold.rend()); for(int i=0;ierrorOut(e, "RAbundVector", "print"); exit(1); } } /***********************************************************************/ int RAbundVector::getNumBins(){ return numBins; } /***********************************************************************/ int RAbundVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ int RAbundVector::getMaxRank(){ return maxRank; } /***********************************************************************/ RAbundVector RAbundVector::getRAbundVector(){ return *this; } /***********************************************************************/ RAbundFloatVector RAbundVector::getRAbundFloatVector(){ RAbundFloatVector rav; rav.setLabel(label); for(int i=0;ierrorOut(e, "RAbundVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ OrderVector RAbundVector::getOrderVector(map* nameMap = nullptr) { try { vector ovData; for(int i=0;ierrorOut(e, "RAbundVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/rabundvector.hpp000077500000000000000000000034471424121717000227200ustar00rootroot00000000000000#ifndef RABUND_H #define RABUND_H #include "datavector.hpp" /* Data Structure for a rabund file. This class is a child to datavector. It represents OTU information at a certain distance. A rabundvector can be converted into and ordervector, listvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = 6, because there are six member in that OTU. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class RAbundFloatVector; class OrderVector; class RAbundVector : public DataVector { public: RAbundVector(); RAbundVector(int); RAbundVector(vector, int, int, int); RAbundVector(vector); RAbundVector(string, vector); RAbundVector(const RAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs) {}; RAbundVector(ifstream&); RAbundVector(ifstream& f, string l); //filehandle, label ~RAbundVector(); int getNumBins(); int getNumSeqs(); int getMaxRank(); int remove(int); void set(int, int); int get(int); vector get() { return data; } void push_back(int); void pop_back(); void resize(int); int size(); void quicksort(); int sum(); int sum(int); int numNZ(); vector getSortedD(); void clear(); vector::reverse_iterator rbegin(); vector::reverse_iterator rend(); void print(ostream&); //sorted void nonSortedPrint(ostream&); //nonsorted RAbundVector getRAbundVector(); RAbundFloatVector getRAbundFloatVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); private: vector data; int maxRank; int numBins; int numSeqs; }; #endif mothur-1.48.0/source/datastructures/report.cpp000066400000000000000000000023611424121717000215170ustar00rootroot00000000000000// // report.cpp // Mothur // // Created by Sarah Westcott on 7/15/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "report.hpp" /**************************************************************************************************/ vector Report::readHeaders(ifstream& repFile){ try { string headers = util.getline(repFile); reportHeaders = util.splitWhiteSpace(headers); return reportHeaders; } catch(exception& e) { m->errorOut(e, "Report", "readHeaders"); exit(1); } } /**************************************************************************************************/ void Report::printHeaders(ofstream& repFile){ try { for (int i = 0; i < reportHeaders.size(); i++) { if (m->getControl_pressed()) { break; } repFile << reportHeaders[i] << '\t'; } repFile << endl; } catch(exception& e) { m->errorOut(e, "Report", "printHeaders"); exit(1); } } /**************************************************************************************************/ /**************************************************************************************************/ mothur-1.48.0/source/datastructures/report.hpp000066400000000000000000000015701424121717000215250ustar00rootroot00000000000000// // report.hpp // Mothur // // Created by Sarah Westcott on 7/15/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef report_hpp #define report_hpp #include "utils.hpp" #include "mothurout.h" /**************************************************************************************************/ class Report { public: Report() { m = MothurOut::getInstance(); } virtual ~Report() = default; virtual void read(ifstream&) = 0; vector getHeaders() { return reportHeaders; } vector readHeaders(ifstream&); void printHeaders(ofstream&); protected: virtual void fillHeaders() = 0; MothurOut* m; Utils util; vector reportHeaders; }; /**************************************************************************************************/ #endif /* report_hpp */ mothur-1.48.0/source/datastructures/sabundvector.cpp000077500000000000000000000123441424121717000227100ustar00rootroot00000000000000/* * sabund.cpp * * * Created by Pat Schloss on 8/8/08. * Copyright 2008 Patrick D. Schloss. All rights resesaved. * */ #include "sabundvector.hpp" /***********************************************************************/ SAbundVector::SAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0){} /***********************************************************************/ SAbundVector::SAbundVector(int size) : DataVector(), data(size, 0), maxRank(0), numBins(0), numSeqs(0) {} /***********************************************************************/ SAbundVector::SAbundVector(string id, vector sav) : DataVector(id), data(sav) { try { for(int i=0;ierrorOut(e, "SAbundVector", "SAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector::SAbundVector(vector dataVec, int mr, int nb, int ns) { try { data = dataVec; maxRank = mr; numBins = nb; numSeqs = ns; } catch(exception& e) { m->errorOut(e, "SAbundVector", "SAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector::SAbundVector(ifstream& f): DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { int hold; f >> label >> hold; data.assign(hold+1, 0); int inputData; for(int i=1;i<=hold;i++){ f >> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SAbundVector", "SAbundVector"); exit(1); } } /***********************************************************************/ void SAbundVector::set(int sabund, int abundance){ try { int initSize = data[sabund]; data[sabund] = abundance; if(sabund != 0){ numBins += (abundance - initSize); } numSeqs += sabund * (abundance - initSize); if(sabund > maxRank) { maxRank = sabund; } } catch(exception& e) { m->errorOut(e, "SAbundVector", "set"); exit(1); } } /***********************************************************************/ int SAbundVector::get(int index){ return data[index]; } /***********************************************************************/ void SAbundVector::push_back(int abundance){ try { data.push_back(abundance); maxRank++; numBins += abundance; numSeqs += (maxRank * abundance); } catch(exception& e) { m->errorOut(e, "SAbundVector", "push_back"); exit(1); } } /***********************************************************************/ void SAbundVector::quicksort(){ sort(data.rbegin(), data.rend()); } /***********************************************************************/ int SAbundVector::sum(){ return util.sum(data); } /***********************************************************************/ void SAbundVector::resize(int size){ data.resize(size); } /***********************************************************************/ int SAbundVector::size(){ return data.size(); } /***********************************************************************/ void SAbundVector::print(string prefix, ostream& output){ output << prefix << '\t' << maxRank; for(int i=1;i<=maxRank;i++){ output << '\t' << data[i]; } output << endl; } /***********************************************************************/ void SAbundVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; data.clear(); } /***********************************************************************/ void SAbundVector::print(ostream& output){ try { output << label << '\t' << maxRank; for(int i=1;i<=maxRank;i++){ output << '\t' << data[i]; } output << endl; } catch(exception& e) { m->errorOut(e, "SAbundVector", "print"); exit(1); } } /**********************************************************************/ int SAbundVector::getNumBins(){ return numBins; } /***********************************************************************/ int SAbundVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ int SAbundVector::getMaxRank(){ return maxRank; } /***********************************************************************/ RAbundVector SAbundVector::getRAbundVector(){ try { RAbundVector rav; for(int i=1;i < data.size();i++){ for(int j=0;jerrorOut(e, "SAbundVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector SAbundVector::getSAbundVector(){ return *this; } /***********************************************************************/ OrderVector SAbundVector::getOrderVector(map* hold){ try { OrderVector ov; int binIndex = 0; for(int i=1;ierrorOut(e, "SAbundVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sabundvector.hpp000066400000000000000000000031311424121717000227040ustar00rootroot00000000000000#ifndef SABUND_H #define SABUND_H #include "datavector.hpp" #include "rabundvector.hpp" #include "ordervector.hpp" #include "calculator.h" /* Data Structure for a sabund file. This class is a child to datavector. It represents OTU information at a certain distance. A sabundvector can be converted into and ordervector, listvector or rabundvector. Each member of the internal container "data" represents the number of OTU's with that many members, but staring at 1. So data[1] = 2, because there are two OTUs with 1 member. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class SAbundVector : public DataVector { public: SAbundVector(); SAbundVector(int); // SAbundVector(const SAbundVector&); SAbundVector(vector, int, int, int); SAbundVector(string, vector); SAbundVector(const SAbundVector& rv) : DataVector(rv.label), data(rv.data), maxRank(rv.maxRank), numBins(rv.numBins), numSeqs(rv.numSeqs){}; SAbundVector(ifstream&); ~SAbundVector(){}; int getNumBins(); int getNumSeqs(); int getMaxRank(); void set(int, int); int get(int); void push_back(int); void quicksort(); int sum(); void resize(int); int size(); void clear(); void print(ostream&); void print(string, ostream&); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map* hold = nullptr); private: vector data; // bool needToUpdate; // void updateStats(); int maxRank; int numBins; int numSeqs; }; #endif mothur-1.48.0/source/datastructures/searchdatabase.hpp000077500000000000000000000050661424121717000231530ustar00rootroot00000000000000#ifndef DATABASE_HPP #define DATABASE_HPP /* * searchdatabase.hpp * * * Created by Pat Schloss on 12/16/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ /* This class is a parent to kmerdb, suffixdb. */ #include "mothur.h" #include "sequence.hpp" #include "currentfile.h" #include "utils.hpp" /**************************************************************************************************/ struct seqMatch { //used to select top n matches int seq; int match; seqMatch()=default; seqMatch(int s, int m) : seq(s), match(m) {} }; /**************************************************************************************************/ inline bool compareSeqMatches (seqMatch member, seqMatch member2){ //sorts largest to smallest if(member.match > member2.match){ return true; } else{ return false; } } /**************************************************************************************************/ inline bool compareSeqMatchesReverse (seqMatch member, seqMatch member2){ //sorts largest to smallest if(member.match < member2.match){ return true; } else{ return false; } } /**************************************************************************************************/ class SearchDatabase { public: SearchDatabase(){ longest = 0; numSeqs = 0; m = MothurOut::getInstance(); } virtual ~SearchDatabase(){}; virtual void generateDB() = 0; virtual void readDB(ifstream&){}; virtual void addSequence(Sequence) = 0; //add sequence to search engine virtual void addSequences(vector seqs) { for (int i = 0; i < seqs.size(); i++) { addSequence(seqs[i]); } } virtual void setNumSeqs(int i) { numSeqs = i; } virtual vector findClosestSequences(Sequence*, int, vector&) const = 0; // returns indexes of n closest sequences to query virtual vector< vector > get(int i, char& s) { s='x'; vector< vector > blank; return blank; } virtual vector getIndicatorColumns() { return nullIntVector; } virtual map getFilteredIndicatorColumns(string, vector&) { return nullIntMap; } virtual int getLongestBase() { return longest+1; } virtual vector getSequencesWithKmer(int){ vector filler; return filler; }; virtual int getReversed(int) { return 0; } virtual int getMaxKmer(){ return 1; } virtual string getName(int) { return ""; } protected: MothurOut* m; int numSeqs, longest; Utils util; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/sequence.cpp000077500000000000000000000633551424121717000220310ustar00rootroot00000000000000/* * sequence.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "sequence.hpp" #include "protein.hpp" /***********************************************************************/ Sequence::Sequence(){ m = MothurOut::getInstance(); initialize(); } /***********************************************************************/ Sequence::Sequence(string newName, string sequence) { try { m = MothurOut::getInstance(); initialize(); name = newName; util.checkName(name); //setUnaligned removes any gap characters for us setUnaligned(sequence); setAligned(sequence); } catch(exception& e) { m->errorOut(e, "Sequence", "Sequence"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Sequence::Sequence(istringstream& fastaString){ try { m = MothurOut::getInstance(); initialize(); name = getSequenceName(fastaString); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaString) { while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaString); if (fastaString) { fastaString >> name; name = name.substr(1); }else { name = ""; break; } } //while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there comment = getCommentString(fastaString); int numAmbig = 0; sequence = getSequenceString(fastaString, numAmbig); setAligned(sequence); //setUnaligned removes any gap characters for us setUnaligned(sequence); if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences.\n"); } } } catch(exception& e) { m->errorOut(e, "Sequence", "Sequence"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Sequence::Sequence(ifstream& fastaFile){ try { m = MothurOut::getInstance(); initialize(); name = getSequenceName(fastaFile); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there comment = getCommentString(fastaFile); int numAmbig = 0; sequence = getSequenceString(fastaFile, numAmbig); setAligned(sequence); //setUnaligned removes any gap characters for us setUnaligned(sequence); if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences.\n"); } } } catch(exception& e) { m->errorOut(e, "Sequence", "Sequence"); exit(1); } } //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq #ifdef USE_BOOST Sequence::Sequence(boost::iostreams::filtering_istream& fastaFile){ try { m = MothurOut::getInstance(); initialize(); name = getSequenceName(fastaFile); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there comment = getCommentString(fastaFile); int numAmbig = 0; sequence = getSequenceString(fastaFile, numAmbig); setAligned(sequence); //setUnaligned removes any gap characters for us setUnaligned(sequence); if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences.\n"); } } } catch(exception& e) { m->errorOut(e, "Sequence", "Sequence"); exit(1); } } #endif //******************************************************************************************************************** //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){ try { m = MothurOut::getInstance(); initialize(); extraInfo = ""; name = getSequenceName(fastaFile); if (!m->getControl_pressed()) { string sequence; //read comments while ((name[0] == '#') && fastaFile) { while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there sequence = getCommentString(fastaFile); if (fastaFile) { fastaFile >> name; name = name.substr(1); }else { name = ""; break; } } //read info after sequence name while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13 || c == -1){ break; } extraInfo += c; } comment = extraInfo; int numAmbig = 0; sequence = getSequenceString(fastaFile, numAmbig); setAligned(sequence); //setUnaligned removes any gap characters for us setUnaligned(sequence); if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences.\n"); } } } catch(exception& e) { m->errorOut(e, "Sequence", "Sequence"); exit(1); } } /***********************************************************************/ Protein Sequence::getProtein() { try { Protein thisProtein = getProtein(1, false); return thisProtein; } catch(exception& e) { m->errorOut(e, "Sequence", "getProtein"); exit(1); } } /***********************************************************************/ //startFrame options: 1,2,3,-1,-2,-3. 1 -> start at 0, 2 start at 1, 3 start at 2. Protein Sequence::getProtein(int sf, bool trim) { try { vector aa; int startFrame = sf; int length = unaligned.length(); if (sf < 1) { //-1,-2,-3 startFrame = (length+(sf+1)) % 3; }else { startFrame--; } for (int i = startFrame; i <= length-3;) { if (m->getControl_pressed()) { break; } string codon = ""; codon += unaligned[i]; i++; codon += unaligned[i]; i++; codon += unaligned[i]; i++; AminoAcid thisAA(codon); if (thisAA.getNum() == stop) { if (trim) { break; } else { thisAA.setAmino('*'); } } aa.push_back(thisAA); } Protein thisProtein(name, aa); return thisProtein; } catch(exception& e) { m->errorOut(e, "Protein", "getSequence"); exit(1); } } //******************************************************************************************************************** string Sequence::getSequenceName(ifstream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceName"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST string Sequence::getSequenceName(boost::iostreams::filtering_istream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceName"); exit(1); } } #endif //******************************************************************************************************************** string Sequence::getSequenceName(istringstream& fastaFile) { try { string name = ""; fastaFile >> name; if (name.length() != 0) { name = name.substr(1); util.checkName(name); }else{ if (!fastaFile.eof()) { m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name.\n"); m->setControl_pressed(true); } } return name; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceName"); exit(1); } } //******************************************************************************************************************** string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) { try { string sequence = ""; numAmbig = 0; while(fastaFile.peek() != '>' && fastaFile.peek() != EOF){ if (m->getControl_pressed()) { break; } string line = util.getline(fastaFile); //iterate through string for_each(line.begin(), line.end(), [&numAmbig](char & c) { c = ::toupper(c); if(c != '.' && c != '-' && c != 'A' && c != 'T' && c != 'G' && c != 'C' && c != 'N'){ c = 'N'; numAmbig++; } }); sequence += line; } return sequence; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceString"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST string Sequence::getSequenceString(boost::iostreams::filtering_istream& fastaFile, int& numAmbig) { try { char letter; string sequence = ""; numAmbig = 0; while(fastaFile){ letter= fastaFile.get(); if(letter == '>'){ fastaFile.putback(letter); break; }else if (letter == ' ') {;} else if(isprint(letter)){ letter = toupper(letter); if(letter == 'U'){letter = 'T';} if(letter != '.' && letter != '-' && letter != 'A' && letter != 'T' && letter != 'G' && letter != 'C' && letter != 'N'){ letter = 'N'; numAmbig++; } sequence += letter; } } return sequence; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceString"); exit(1); } } #endif //******************************************************************************************************************** //comment can contain '>' so we need to account for that string Sequence::getCommentString(ifstream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Sequence", "getCommentString"); exit(1); } } //******************************************************************************************************************** #ifdef USE_BOOST //comment can contain '>' so we need to account for that string Sequence::getCommentString(boost::iostreams::filtering_istream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Sequence", "getCommentString"); exit(1); } } #endif //******************************************************************************************************************** string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) { try { string sequence = ""; numAmbig = 0; while(fastaFile.peek() != '>' && fastaFile.peek() != EOF){ if (m->getControl_pressed()) { break; } string line = util.getline(fastaFile); //iterate through string for_each(line.begin(), line.end(), [&numAmbig](char & c) { c = ::toupper(c); if(c != '.' && c != '-' && c != 'A' && c != 'T' && c != 'G' && c != 'C' && c != 'N'){ c = 'N'; numAmbig++; } }); sequence += line; } return sequence; } catch(exception& e) { m->errorOut(e, "Sequence", "getSequenceString"); exit(1); } } //******************************************************************************************************************** //comment can contain '>' so we need to account for that string Sequence::getCommentString(istringstream& fastaFile) { try { char letter; string temp = ""; while(fastaFile){ letter=fastaFile.get(); if((letter == '\r') || (letter == '\n') || letter == -1){ gobble(fastaFile); //in case its a \r\n situation break; }else { temp += letter; } } return temp; } catch(exception& e) { m->errorOut(e, "Sequence", "getCommentString"); exit(1); } } //******************************************************************************************************************** void Sequence::initialize(){ name = ""; unaligned = ""; aligned = ""; pairwise = ""; comment = ""; numBases = 0; alignmentLength = 0; startPos = -1; endPos = -1; longHomoPolymer = -1; ambigBases = -1; } //******************************************************************************************************************** void Sequence::setName(string seqName) { if(seqName[0] == '>') { name = seqName.substr(1); } else { name = seqName; } } //******************************************************************************************************************** void Sequence::setUnaligned(string sequence){ if(sequence.find_first_of('.') != string::npos || sequence.find_first_of('-') != string::npos) { string temp = ""; for(int j=0;j=0;i--){ if(aligned[i] == '-'){ aligned[i] = '.'; } else{ break; } } } } //******************************************************************************************************************** void Sequence::setPairwise(string sequence){ pairwise = sequence; } //******************************************************************************************************************** bool Sequence::isAligned(){ for (int i = 0; i < aligned.length(); i++) { if ((aligned[i] == '.') || (aligned[i] == '-')) { return true; } } return false; } //******************************************************************************************************************** string Sequence::convert2ints() { if(unaligned == "") { /* need to throw an error */ } string processed = unaligned; //iterate through string - replace bases with ints for_each(processed.begin(), processed.end(), [](char & c) { if(c == 'A') { c = '0'; } else if(c == 'C') { c = '1'; } else if(c == 'G') { c = '2'; } else if(c == 'T') { c = '3'; } else if(c == 'U') { c = '3'; } else { c = '4'; } }); return processed; } //******************************************************************************************************************** string Sequence::getName(){ return name; } //******************************************************************************************************************** string Sequence::getAligned(){ return aligned; } //******************************************************************************************************************** string Sequence::getInlineSeq(){ return name + '\t' + aligned; } //******************************************************************************************************************** string Sequence::getPairwise(){ return pairwise; } //******************************************************************************************************************** string Sequence::getUnaligned(){ return unaligned; } //******************************************************************************************************************** string Sequence::getComment(){ return comment; } //******************************************************************************************************************** int Sequence::getNumBases(){ return numBases; } //******************************************************************************************************************** int Sequence::getNumNs(){ int numNs = 0; for (int i = 0; i < unaligned.length(); i++) { if(unaligned[i] == 'N') { numNs++; } } return numNs; } //******************************************************************************************************************** void Sequence::printSequence(OutputWriter* out){ const string seqOutput = '>' + name + comment + '\n' + aligned + '\n'; out->write(seqOutput); } //******************************************************************************************************************** void Sequence::printSequence(ostream& out){ out << ">" << name << comment << endl; out << aligned << endl; } //******************************************************************************************************************** void Sequence::printUnAlignedSequence(ostream& out){ out << ">" << name << comment << endl; out << unaligned << endl; } //******************************************************************************************************************** int Sequence::getAlignLength(){ return alignmentLength; } //******************************************************************************************************************** int Sequence::getAmbigBases(){ if(ambigBases == -1){ ambigBases = 0; for(int j=0;j longHomoPolymer){ longHomoPolymer = homoPolymer; } homoPolymer = 1; } } if(homoPolymer > longHomoPolymer){ longHomoPolymer = homoPolymer; } } return longHomoPolymer; } //******************************************************************************************************************** int Sequence::getStartPos(){ bool isAligned = false; if(startPos == -1){ for(int j = 0; j < alignmentLength; j++) { if((aligned[j] != '.')&&(aligned[j] != '-')){ startPos = j + 1; break; }else { isAligned = true; } } } if(!isAligned){ startPos = 1; } return startPos; } //******************************************************************************************************************** int Sequence::filterToPos(int start){ if (start > aligned.length()) { start = aligned.length(); m->mothurOut("[ERROR]: start to large.\n"); } for(int j = 0; j < start; j++) { aligned[j] = '.'; } //things like ......----------AT become ................AT for(int j = start; j < aligned.length(); j++) { if (isalpha(aligned[j])) { break; } else { aligned[j] = '.'; } } setUnaligned(aligned); return 0; } //******************************************************************************************************************** int Sequence::filterFromPos(int end){ if (end > aligned.length()) { end = aligned.length(); m->mothurOut("[ERROR]: end to large.\n"); } for(int j = end; j < aligned.length(); j++) { aligned[j] = '.'; } for(int j = aligned.length()-1; j < 0; j--) { if (isalpha(aligned[j])) { break; } else { aligned[j] = '.'; } } setUnaligned(aligned); return 0; } //******************************************************************************************************************** int Sequence::getEndPos(){ bool isAligned = false; if (alignmentLength != numBases) { isAligned = true; } if(endPos == -1){ for(int j=alignmentLength-1;j>=0;j--){ if((aligned[j] != '.')&&(aligned[j] != '-')){ endPos = j + 1; break; }else { isAligned = true; } } } if(!isAligned){ endPos = numBases; } return endPos; } //******************************************************************************************************************** void Sequence::padToPos(int start){ for(int j = getStartPos()-1; j < start-1; j++) { aligned[j] = '.'; } startPos = start; } //******************************************************************************************************************** void Sequence::padFromPos(int end){ for(int j = end; j < getEndPos(); j++) { aligned[j] = '.'; } endPos = end; } //******************************************************************************************************************** void Sequence::setComment(string c){ comment = c; } //******************************************************************************************************************** void Sequence::reverseComplement(){ string temp; for(int i=numBases-1;i>=0;i--){ if(unaligned[i] == 'A') { temp += 'T'; } else if(unaligned[i] == 'T'){ temp += 'A'; } else if(unaligned[i] == 'G'){ temp += 'C'; } else if(unaligned[i] == 'C'){ temp += 'G'; } else { temp += 'N'; } } setAligned(temp); } //******************************************************************************************************************** void Sequence::trim(int length){ if(numBases > length){ unaligned = unaligned.substr(0,length); numBases = length; setAligned(unaligned); } } ///**************************************************************************************************/ mothur-1.48.0/source/datastructures/sequence.hpp000077500000000000000000000056701424121717000220320ustar00rootroot00000000000000#ifndef SEQUENCE_H #define SEQUENCE_H /* * sequence.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * A sequence object has three components: i) an accession number / name, ii) the unaligned primary sequence, iii) a * pairwise aligned sequence, and iv) a sequence that is aligned to a reference alignment. This class has methods * to set and get these values for the other classes where they are needed. * * */ //Data Structure for a fasta file. #include "mothurout.h" #include "utils.hpp" #include "writer.h" class Protein; /**************************************************************************************************/ class Sequence { #ifdef UNIT_TEST friend class TestSequence; #endif public: Sequence(); Sequence(string, string); Sequence(ifstream&); Sequence(ifstream&, string&, bool); Sequence(istringstream&); #ifdef USE_BOOST Sequence(boost::iostreams::filtering_istream&); #endif ~Sequence() = default; void setName(string); string getName(); void setUnaligned(string); string getUnaligned(); void setAligned(string); string getAligned(); void setComment(string); string getComment(); void setPairwise(string); string getPairwise(); Protein getProtein(int, bool); //starting frame, trim Protein getProtein(); //assumes starting frame 1, trim=false bool isAligned(); string getInlineSeq(); int getNumNs(); int getNumBases(); int getStartPos(); int getEndPos(); void reverseComplement(); void trim(int); void padToPos(int); void padFromPos(int); int filterToPos(int); //any character before the pos is changed to . and aligned and unaligned strings changed int filterFromPos(int); //any character after the pos is changed to . and aligned and unaligned strings changed int getAlignLength(); int getAmbigBases(); void removeAmbigBases(); int getLongHomoPolymer(); string convert2ints(); void printSequence(ostream&); void printSequence(OutputWriter*); void printUnAlignedSequence(ostream&); protected: MothurOut* m; void initialize(); string getSequenceString(ifstream&, int&); string getCommentString(ifstream&); string getSequenceString(istringstream&, int&); string getCommentString(istringstream&); string getSequenceName(ifstream&); #ifdef USE_BOOST string getCommentString(boost::iostreams::filtering_istream&); string getSequenceString(boost::iostreams::filtering_istream&, int&); string getSequenceName(boost::iostreams::filtering_istream&); #endif string getSequenceName(istringstream&); string name; string unaligned; string aligned; string pairwise; string comment; int numBases; int alignmentLength; int longHomoPolymer; int ambigBases; int startPos, endPos; Utils util; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/datastructures/sequencecountparser.cpp000066400000000000000000000066711424121717000243120ustar00rootroot00000000000000// // sequencecountparser.cpp // Mothur // // Created by Sarah Westcott on 8/7/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "sequencecountparser.h" #include "splitgroupscommand.h" /************************************************************/ SequenceCountParser::SequenceCountParser(string countfile, string fastafile, vector groupsSelected) { try { m = MothurOut::getInstance(); //run splitGroups command to parse files string inputString = ""; if (groupsSelected.size() == 0) { CountTable ct; ct.testGroups(countfile, groupsSelected); //fills groupsSelected with groups in count table } m->mothurOut("\n/******************************************/\n"); m->mothurOut("Splitting by sample: \n"); time_t start = time(nullptr); SplitGroupCommand* splitCommand = new SplitGroupCommand(groupsSelected, fastafile, countfile, ""); //type -> files in groups order. fasta -> vector. fastaFileForGroup1 stored in filenames["fasta"][1] map > filenames = splitCommand->getOutputFiles(); delete splitCommand; m->mothurOut("\nIt took " + toString(time(nullptr) - start) + " seconds to split the dataset by sample.\n"); m->mothurOut("/******************************************/\n"); vector parsedFastaFiles = filenames["fasta"]; //sorted in groups order vector parsedCountFiles = filenames["count"]; //sorted in groups order if (parsedCountFiles.size() != groupsSelected.size()) { cout << "should never get here, quitting\n\n"; m->setControl_pressed(true); } namesOfGroups = groupsSelected; for (int i = 0; i < groupsSelected.size(); i++) { vector thisSamplesFiles; thisSamplesFiles.push_back(parsedFastaFiles[i]); thisSamplesFiles.push_back(parsedCountFiles[i]); groupToFiles[groupsSelected[i]] = thisSamplesFiles; } //reset current files changed by split.groups CurrentFile* current; current = CurrentFile::getInstance(); current->setCountFile(countfile); current->setFastaFile(fastafile); } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "SequenceCountParser"); exit(1); } } /************************************************************/ SequenceCountParser::~SequenceCountParser(){ } /************************************************************/ int SequenceCountParser::getNumGroups(){ return namesOfGroups.size(); } /************************************************************/ vector SequenceCountParser::getNamesOfGroups(){ return namesOfGroups; } /************************************************************/ vector SequenceCountParser::getFiles(string group){ try { map >::iterator it; it = groupToFiles.find(group); if (it != groupToFiles.end()) { return it->second; }else { m->mothurOut("[ERROR]: cannot find files for group " + group + ", quitting.\n"); m->setControl_pressed(true); } return nullVector; } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "getFiles"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/sequencecountparser.h000077500000000000000000000023731424121717000237550ustar00rootroot00000000000000#ifndef Mothur_sequencecountparser_h #define Mothur_sequencecountparser_h // // sequencecountparser.h // Mothur // // Created by Sarah Westcott on 8/7/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "mothur.h" #include "mothurout.h" #include "sequence.hpp" #include "counttable.h" #include "utils.hpp" /* This class reads a fasta and count file and parses the data by group. The countfile must contain group information. Note: THIS CODE IS NOT THREAD SAFE. ONLY CALL WHEN A SINGLE THREAD IS RUNNING */ class SequenceCountParser { public: SequenceCountParser(string, string, vector); //count, fasta, groups - file mismatches will set m->setControl_pressed(true) ~SequenceCountParser(); //general operations int getNumGroups(); vector getNamesOfGroups(); vector getFiles(string); //returns fasta and count file a specific group. map > getFiles() { return groupToFiles; } //returns all files groupName - > vector of groups files (fasta, count); private: MothurOut* m; Utils util; map > groupToFiles; vector namesOfGroups; //namesOfGroups in same order as groupToSeqs }; #endif mothur-1.48.0/source/datastructures/sequencedb.cpp000077500000000000000000000152331424121717000223270ustar00rootroot00000000000000/* * sequencedb.cpp * Mothur * * Created by Thomas Ryabin on 4/13/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sequencedb.h" #include "sequence.hpp" #include "mothur.h" #include "calculator.h" #include "kmer.hpp" /***********************************************************************/ SequenceDB::SequenceDB() : StorageDatabase(){}; /***********************************************************************/ //the clear function free's the memory SequenceDB::~SequenceDB() { data.clear(); } /***********************************************************************/ SequenceDB::SequenceDB(int newSize) : StorageDatabase() { data.resize(newSize, Sequence()); } /***********************************************************************/ //kmerDB[0] = vector maxKmers long, contains kmer counts SequenceDB::SequenceDB(ifstream& filehandle, int kmerSize, vector< vector< int > >& kmerDB, vector< int >& lengths) { try{ Utils util; length = 0; samelength = true; lengths.clear(); kmerDB.clear(); int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; int maxKmer = power4s[kmerSize]; Kmer kmer(kmerSize); while (!filehandle.eof()) { //input sequence info into sequencedb Sequence newSequence(filehandle); gobble(filehandle); if (newSequence.getName() != "") { if (length == 0) { length = newSequence.getAligned().length(); } if (length != newSequence.getAligned().length()) { samelength = false; } data.push_back(newSequence); vector kmerLocations; kmerLocations.resize(maxKmer+1, 0); int numKmers = newSequence.getNumBases() - kmerSize + 1; for(int i=0;ierrorOut(e, "SequenceDB", "SequenceDB"); exit(1); } } /***********************************************************************/ SequenceDB::SequenceDB(ifstream& filehandle) : StorageDatabase() { try{ //read through file while (!filehandle.eof()) { //input sequence info into sequencedb Sequence newSequence(filehandle); if (newSequence.getName() != "") { if (length == 0) { length = newSequence.getAligned().length(); } if (length != newSequence.getAligned().length()) { samelength = false; } data.push_back(newSequence); } //takes care of white space gobble(filehandle); } filehandle.close(); } catch(exception& e) { m->errorOut(e, "SequenceDB", "SequenceDB"); exit(1); } } /***********************************************************************/ SequenceDB::SequenceDB(const SequenceDB& sdb, unordered_set names) : StorageDatabase() { try{ int numSeqs = sdb.data.size(); for (int i = 0; i < numSeqs; i++) { Sequence seqI = sdb.data[i]; if (names.count(seqI.getName()) != 0) { if (length == 0) { length = seqI.getAligned().length(); } if (length != seqI.getAligned().length()) { samelength = false; } data.push_back(seqI); } } } catch(exception& e) { m->errorOut(e, "SequenceDB", "SequenceDB"); exit(1); } } /***********************************************************************/ SequenceDB::SequenceDB(const SequenceDB& sdb, unordered_set names, int kmerSize, vector< vector< int > >& kmerDB, vector< int >& lengths) : StorageDatabase() { try{ int numSeqs = sdb.data.size(); Utils util; length = 0; samelength = true; lengths.clear(); kmerDB.clear(); int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; int maxKmer = power4s[kmerSize]; Kmer kmer(kmerSize); for (int i = 0; i < numSeqs; i++) { Sequence newSequence = sdb.data[i]; if (names.count(newSequence.getName()) != 0) { if (length == 0) { length = newSequence.getAligned().length(); } if (length != newSequence.getAligned().length()) { samelength = false; } data.push_back(newSequence); vector kmerLocations; kmerLocations.resize(maxKmer+1, 0); int numKmers = newSequence.getNumBases() - kmerSize + 1; for(int i=0;ierrorOut(e, "SequenceDB", "SequenceDB"); exit(1); } } /***********************************************************************/ int SequenceDB::getNumSeqs() { return data.size(); } /***********************************************************************/ Sequence SequenceDB::getSeq(int index) { return data[index]; } /***********************************************************************/ void SequenceDB::push_back(Sequence newSequence) { try { if (length == 0) { length = newSequence.getAligned().length(); } if (length != newSequence.getAligned().length()) { samelength = false; } data.push_back(newSequence); } catch(exception& e) { m->errorOut(e, "SequenceDB", "push_back"); exit(1); } } /***********************************************************************/ void SequenceDB::print(string outputFileName) { try { ofstream out; util.openOutputFile(outputFileName, out); for (int i = 0; i < data.size(); i++) { data[i].printSequence(out); } out.close(); } catch(exception& e) { m->errorOut(e, "SequenceDB", "print"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sequencedb.h000077500000000000000000000024341424121717000217730ustar00rootroot00000000000000#ifndef SEQUENCEDB_H #define SEQUENCEDB_H /* * sequencedb.h * Mothur * * Created by Thomas Ryabin on 4/13/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class is a container to store the sequences. */ #include "storagedatabase.hpp" #include "sequence.hpp" class SequenceDB : public StorageDatabase { public: SequenceDB(); SequenceDB(int); //makes data that size SequenceDB(ifstream&); //reads file to fill data SequenceDB(ifstream&, int, vector< vector< int > >&, vector< int >&); //filehandle, kmersize, kmerdb, lengths SequenceDB(const SequenceDB& sdb) : data(sdb.data) {}; SequenceDB(const SequenceDB& sdb, unordered_set names); //creates a new sequenceDB containing only the reads in names SequenceDB(const SequenceDB& sdb, unordered_set names, int kmerSize, vector< vector< int > >& kmerDB, vector< int >& lengths); ~SequenceDB(); //loops through data and delete each sequence int getNumSeqs(); Sequence getSeq(int); //returns sequence at that location void push_back(Sequence); //adds unaligned sequence bool sameLength() { return samelength; } void print(string); //prints fasta file containing sequences in this db private: vector data; }; #endif mothur-1.48.0/source/datastructures/sequenceparser.cpp000077500000000000000000000154721424121717000232430ustar00rootroot00000000000000/* * sequenceParser.cpp * Mothur * * Created by westcott on 9/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "sequenceparser.h" /************************************************************/ SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFile, vector groupsSelected) { try { m = MothurOut::getInstance(); hasName = true; //read group file GroupMap groupMap; int error = groupMap.readMap(groupFile, groupsSelected); //only store info for groups selected if (error == 1) { m->setControl_pressed(true); } //initialize maps namesOfGroups = groupMap.getNamesOfGroups(); //run splitGroups command to parse files string inputString = ""; if (groupsSelected.size() != 0) { sort(groupsSelected.begin(), groupsSelected.end()); } else { groupsSelected = namesOfGroups; } for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); } inputString += "processors=1, groups=" + util.getStringFromVector(groupsSelected, "-"); //split.groups is paraplellized, we don't want the thread spinning up threads. inputString += ", fasta=" + fastaFile; inputString += ", name=" + nameFile; inputString += ", group=" + groupFile; m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: split.groups(" + inputString + ")\n"); Command* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); //type -> files in groups order. fasta -> vector. fastaFileForGroup1 stored in filenames["fasta"][1] map > filenames = splitCommand->getOutputFiles(); delete splitCommand; m->mothurOut("/******************************************/\n"); vector parsedFastaFiles = filenames["fasta"]; //sorted in groups order vector parsedNameFiles = filenames["name"]; //sorted in groups order vector parsedGroupFiles = filenames["group"]; //sorted in groups order if (parsedNameFiles.size() != groupsSelected.size()) { cout << "should never get here, quitting\n\n"; m->setControl_pressed(true); } for (int i = 0; i < groupsSelected.size(); i++) { vector thisSamplesFiles; thisSamplesFiles.push_back(parsedFastaFiles[i]); thisSamplesFiles.push_back(parsedNameFiles[i]); thisSamplesFiles.push_back(parsedGroupFiles[i]); groupToFiles[groupsSelected[i]] = thisSamplesFiles; } //reset current files changed by split.groups CurrentFile* current; current = CurrentFile::getInstance(); current->setNameFile(nameFile); current->setFastaFile(fastaFile); current->setGroupFile(groupFile); } catch(exception& e) { m->errorOut(e, "SequenceParser", "SequenceParser"); exit(1); } } /************************************************************/ //leaves all seqs map blank to be filled when asked for SequenceParser::SequenceParser(string groupFile, string fastaFile, vector groupsSelected) { try { m = MothurOut::getInstance(); hasName = false; //read group file GroupMap groupMap; int error = groupMap.readMap(groupFile, groupsSelected); //only store info for groups selected if (error == 1) { m->setControl_pressed(true); } //initialize maps namesOfGroups = groupMap.getNamesOfGroups(); //run splitGroups command to parse files string inputString = ""; if (groupsSelected.size() != 0) { sort(groupsSelected.begin(), groupsSelected.end()); for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); } inputString += "groups=" + util.getStringFromVector(groupsSelected, "-"); }else { groupsSelected = namesOfGroups; for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); } } inputString += ", fasta=" + fastaFile; inputString += ", group=" + groupFile; m->mothurOut("\n/******************************************/\n"); m->mothurOut("Running command: split.groups(" + inputString + ")\n"); Command* splitCommand = new SplitGroupCommand(inputString); splitCommand->execute(); //type -> files in groups order. fasta -> vector. fastaFileForGroup1 stored in filenames["fasta"][1] map > filenames = splitCommand->getOutputFiles(); delete splitCommand; m->mothurOut("/******************************************/\n"); vector parsedFastaFiles = filenames["fasta"]; //sorted in groups order vector parsedGroupFiles = filenames["group"]; //sorted in groups order if (parsedFastaFiles.size() != groupsSelected.size()) { cout << "should never get here, quitting\n\n"; m->setControl_pressed(true); } for (int i = 0; i < groupsSelected.size(); i++) { vector thisSamplesFiles; thisSamplesFiles.push_back(parsedFastaFiles[i]); thisSamplesFiles.push_back(parsedGroupFiles[i]); groupToFiles[groupsSelected[i]] = thisSamplesFiles; } //reset current files changed by split.groups CurrentFile* current; current = CurrentFile::getInstance(); current->setFastaFile(fastaFile); current->setGroupFile(groupFile); } catch(exception& e) { m->errorOut(e, "SequenceParser", "SequenceParser"); exit(1); } } /************************************************************/ SequenceParser::~SequenceParser(){ } /************************************************************/ int SequenceParser::getNumGroups(){ return namesOfGroups.size(); } /************************************************************/ vector SequenceParser::getNamesOfGroups(){ return namesOfGroups; } /************************************************************/ vector SequenceParser::getFiles(string group){ try { map >::iterator it; it = groupToFiles.find(group); if (it != groupToFiles.end()) { return it->second; }else { m->mothurOut("[ERROR]: cannot find files for group " + group + ", quitting.\n"); m->setControl_pressed(true); } return nullVector; } catch(exception& e) { m->errorOut(e, "SequenceParser", "getFiles"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/sequenceparser.h000077500000000000000000000031631424121717000227020ustar00rootroot00000000000000#ifndef SEQUENCEPARSER_H #define SEQUENCEPARSER_H /* * sequenceParser.h * Mothur * * Created by westcott on 9/9/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "utils.hpp" #include "mothurout.h" #include "sequence.hpp" #include "groupmap.h" #include "splitgroupscommand.h" /* This class reads a fasta and group file with a namesfile as optional and parses the data by group. Note: The sum of all the groups unique sequences will be larger than the original number of unique sequences. This is because when we parse the name file we make a unique for each group instead of 1 unique for all groups. */ class SequenceParser { public: SequenceParser(string, string, vector); //group, fasta, groups (if blanks then all) - file mismatches will set m->setControl_pressed(true) SequenceParser(string, string, string, vector); //group, fasta, name, groups (if blanks then all) - file mismatches will set m->setControl_pressed(true) ~SequenceParser(); //general operations int getNumGroups(); vector getNamesOfGroups(); vector getFiles(string); //returns fasta and count file a specific group. map > getFiles() { return groupToFiles; } //returns all files groupName - > vector of groups files (fasta, optionalName, group); private: MothurOut* m; Utils util; bool hasName; map > groupToFiles; //groupName -> fasta, name, group or groupName -> fasta, group vector namesOfGroups; //namesOfGroups in same order as groupToSeqs; }; #endif mothur-1.48.0/source/datastructures/sffheader.cpp000066400000000000000000000206251424121717000221360ustar00rootroot00000000000000// // sffheader.cpp // Mothur // // Created by Sarah Westcott on 6/10/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "sffheader.hpp" //*************************************************************************************** SffCommonHeader::~SffCommonHeader(){ if (entireHeader.size() != 0) { for (int i = 0; i < entireHeader.size(); i++) { delete[] entireHeader[i]; } entireHeader.clear(); } } //*************************************************************************************** SffCommonHeader::SffCommonHeader(){ try { m = MothurOut::getInstance(); padSize = 0; magicNumber=0; indexOffset=0; indexLength=0; numReads=0; headerLength=0; keyLength=0; numFlows=0; flogramFormatCode='s'; } catch(exception& e) { m->errorOut(e, "SffCommonHeader", "SffCommonHeader"); exit(1); } } //*************************************************************************************** SffCommonHeader::SffCommonHeader(ifstream& in){ try { m = MothurOut::getInstance(); read(in); padSize = 0; magicNumber=0; indexOffset=0; indexLength=0; numReads=0; headerLength=0; keyLength=0; numFlows=0; flogramFormatCode='s'; } catch(exception& e) { m->errorOut(e, "SffCommonHeader", "SffCommonHeader"); exit(1); } } //********************************************************************************** bool SffCommonHeader::read(ifstream& in){ try { bool goodHeader = true; if (!in.eof()) { //read magic number char* magic = new char[4]; in.read(&(*magic), 4); magicNumber = be_int4(*(unsigned int *)(magic)); entireHeader.push_back(magic); //read version char* cversion = new char[4]; in.read(&(*cversion), 4); entireHeader.push_back(cversion); version = ""; for (int i = 0; i < 4; i++) { version += toString((int)(cversion[i])); } //read offset - ignored in print char buffer2 [8]; in.read(buffer2, 8); indexOffset = be_int8(*(unsigned long long *)(&buffer2)); //read index length - ignored in print char buffer3 [4]; in.read(buffer3, 4); indexLength = be_int4(*(unsigned int *)(&buffer3)); //read num reads - ignored in print and set to samples numReads char rnumReads[4]; in.read(rnumReads, 4); numReads = be_int4(*(unsigned int *)(&rnumReads)); if (m->getDebug()) { m->mothurOut("[DEBUG]: numReads = " + toString(numReads) + "\n"); } //read header length char* hlength = new char [2]; in.read(&(*hlength), 2); entireHeader.push_back(hlength); headerLength = be_int2(*(unsigned short *)(hlength)); //read key length char* klength = new char [2]; in.read(&(*klength), 2); entireHeader.push_back(klength); keyLength = be_int2(*(unsigned short *)(klength)); //read number of flow reads char* nflows = new char [2]; in.read(&(*nflows), 2); entireHeader.push_back(nflows); numFlows = be_int2(*(unsigned short *)(nflows)); //read format code char* fcode = new char[1]; in.read(&(*fcode), 1); entireHeader.push_back(fcode); flogramFormatCode = (int)(fcode[0]); //read flow chars char* tempBuffer = new char[numFlows]; in.read(&(*tempBuffer), numFlows); flowChars = tempBuffer; if (flowChars.length() > numFlows) { flowChars = flowChars.substr(0, numFlows); } entireHeader.push_back(tempBuffer); //read key char* tempBuffer2 = new char[keyLength]; in.read(&(*tempBuffer2), keyLength); keySequence = tempBuffer2; if (keySequence.length() > keyLength) { keySequence = keySequence.substr(0, keyLength); } entireHeader.push_back(tempBuffer2); /* Pad to 8 chars */ unsigned long long spotInFile = in.tellg(); unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts char* padding = new char[spot-spotInFile]; entireHeader.push_back(padding); padSize = spot-spotInFile; //ensure good reset in.seekg(spot); //check magic number and version if (magicNumber != 779314790) { m->mothurOut("[ERROR]: Magic Number is not correct, not a valid .sff file\n"); goodHeader = false; } if (version != "0001") { m->mothurOut("[ERROR]: Version is not supported, only support version 0001.\n"); goodHeader = false; } }else{ m->mothurOut("Error reading sff common header.\n"); goodHeader = false; } return goodHeader; } catch(exception& e) { m->errorOut(e, "SffCommonHeader", "read"); exit(1); } } //**************************************************************************************** void SffCommonHeader::printSampleCommonHeader(ofstream& out, int numReads){ try { //magic number out.write(entireHeader[0], 4); //version out.write(entireHeader[1], 4); //offset - read and discard, we will set it to 0 long long offset = 0; char offsetBuffer[8]; offsetBuffer[0] = (offset >> 56) & 0xFF; offsetBuffer[1] = (offset >> 48) & 0xFF; offsetBuffer[2] = (offset >> 40) & 0xFF; offsetBuffer[3] = (offset >> 32) & 0xFF; offsetBuffer[4] = (offset >> 24) & 0xFF; offsetBuffer[5] = (offset >> 16) & 0xFF; offsetBuffer[6] = (offset >> 8) & 0xFF; offsetBuffer[7] = offset & 0xFF; //index = 15 out.write(offsetBuffer, 8); offset = 0; char readIndexLength[4]; readIndexLength[0] = (offset >> 24) & 0xFF; readIndexLength[1] = (offset >> 16) & 0xFF; readIndexLength[2] = (offset >> 8) & 0xFF; readIndexLength[3] = offset & 0xFF; //index = 19 out.write(readIndexLength, 4); //change num reads char numSampleReads[4]; numSampleReads[0] = (numReads >> 24) & 0xFF; numSampleReads[1] = (numReads >> 16) & 0xFF; numSampleReads[2] = (numReads >> 8) & 0xFF; numSampleReads[3] = numReads & 0xFF; //index = 23 out.write(numSampleReads, 4); //read header length out.write(entireHeader[2], 2); //read key length out.write(entireHeader[3], 2); //read number of flow reads out.write(entireHeader[4], 2); //read format code out.write(entireHeader[5], 1); //read flow chars out.write(entireHeader[6], numFlows); //read key out.write(entireHeader[7], keyLength); /* Pad to 8 chars */ out.write(entireHeader[8], padSize); } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "printSampleCommonHeader"); exit(1); } } //*********************************************************************************** void SffCommonHeader::printSFFTxt(ofstream& out) { try { out << "Common Header:\nMagic Number: " << magicNumber << endl; out << "Version: " << version << endl; out << "Index Offset: " << indexOffset << endl; out << "Index Length: " << indexLength << endl; out << "Number of Reads: " << numReads << endl; out << "Header Length: " << headerLength << endl; out << "Key Length: " << keyLength << endl; out << "Number of Flows: " << numFlows << endl; out << "Format Code: " << flogramFormatCode << endl; out << "Flow Chars: " << flowChars << endl; out << "Key Sequence: " << keySequence << endl << endl; } catch(exception& e) { m->errorOut(e, "SffCommonHeader", "printSFFTxt"); exit(1); } } //*********************************************************************************** mothur-1.48.0/source/datastructures/sffheader.hpp000066400000000000000000000107421424121717000221420ustar00rootroot00000000000000// // sffheader.hpp // Mothur // // Created by Sarah Westcott on 6/10/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef sffheader_hpp #define sffheader_hpp #include "mothurout.h" #include "sequence.hpp" #include "qualityscores.h" #include "endiannessmacros.h" /* This class is a representation of a sff common header. https://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format#sff magic_number uint32_t version char[4] index_offset uint64_t index_length uint32_t number_of_reads uint32_t header_length uint16_t key_length uint16_t number_of_flows_per_read uint16_t flowgram_format_code uint8_t flow_chars char[number_of_flows_per_read] key_sequence char[key_length] eight_byte_padding uint8_t[*] The magic_number field value is 0x2E736666, the uint32_t encoding of the string ".sff" The version number corresponding to this proposal is 0001, or the byte array "\0\0\0\1". The index_offset and index_length fields are the offset and length of an optional index of the reads in the SFF file. If no index is included in the file, both fields must be 0. The number_of_reads field should be set to the number of reads stored in the file. The header_length field should be the total number of bytes required by this set of header fields, and should be equal to "31 + number_of_flows_per_read + key_length" rounded up to the next value divisible by 8. The key_length and key_sequence fields should be set to the length and nucleotide bases of the key sequence used for these reads. Note: The key_sequence field is not null-terminated. The number_of_flows_per_read should be set to the number of flows for each of the reads in the file. The flowgram_format_code should be set to the format used to encode each of the flowgram values for each read. Note: Currently, only one flowgram format has been adopted, so this value should be set to 1. The flowgram format code 1 stores each value as a uint16_t, where the floating point flowgram value is encoded as "(int) round(value * 100.0)", and decoded as "(storedvalue * 1.0 / 100.0)". The flow_chars should be set to the array of nucleotide bases ('A', 'C', 'G' or 'T') that correspond to the nucleotides used for each flow of each read. The length of the array should equal number_of_flows_per_read. Note: The flow_chars field is not null-terminated. If any eight_byte_padding bytes exist in the section, they should have a byte value of 0. If an index is included in the file, the index_offset and index_length values in the common header should point to the section of the file containing the index. To support different indexing methods, the index section should begin with the following two fields: */ /**********************************************************/ class SffCommonHeader { public: SffCommonHeader(); SffCommonHeader(ifstream&); ~SffCommonHeader(); bool read(ifstream& in); void printSFFTxt(ofstream&); void printSampleCommonHeader(ofstream& out, int numReads); unsigned short getHeaderLength() { return headerLength; } unsigned short getKeyLength() { return keyLength; } unsigned short getNumFlows() { return numFlows; } unsigned int getMagicNumber() { return magicNumber; } unsigned long long getIndexLength() { return indexLength; } unsigned short getIndexOffset() { return indexOffset; } unsigned int getNumReads() { return numReads; } string getVersion() { return version; } int getFlowgramFormat() { return flogramFormatCode; } string getFlows() { return flowChars; } string getKeySequence() { return keySequence; } void print(ofstream&); private: MothurOut* m; vector entireHeader; int padSize; unsigned int magicNumber; string version; unsigned long long indexOffset; unsigned int indexLength; unsigned int numReads; unsigned short headerLength; unsigned short keyLength; unsigned short numFlows; int flogramFormatCode; string flowChars; //length depends on number flow reads string keySequence; //length depends on key length }; #endif /* sffheader_hpp */ mothur-1.48.0/source/datastructures/sffread.cpp000066400000000000000000000417301424121717000216210ustar00rootroot00000000000000// // sffread.cpp // Mothur // // Created by Sarah Westcott on 6/9/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "sffread.hpp" //*************************************************************************************** SffRead::~SffRead(){ if (entireRead.size() != 0) { for (int i = 0; i < entireRead.size(); i++) { delete[] entireRead[i]; } entireRead.clear(); } } //*************************************************************************************** SffRead::SffRead(int num){ try { m = MothurOut::getInstance(); numFlows = num; padSize1 = 0; padSize2 = 0; headerLength=0; nameLength=0; numBases=0; clipQualLeft=0; clipQualRight=0; clipAdapterLeft=0; clipAdapterRight=0; bases = ""; name = ""; good = false; } catch(exception& e) { m->errorOut(e, "SffRead", "SffRead"); exit(1); } } //*************************************************************************************** SffRead::SffRead(ifstream& in, int num){ try { m = MothurOut::getInstance(); numFlows = num; padSize1 = 0; padSize2 = 0; headerLength=0; nameLength=0; numBases=0; clipQualLeft=0; clipQualRight=0; clipAdapterLeft=0; clipAdapterRight=0; bases = ""; name = ""; good = readSff(in); } catch(exception& e) { m->errorOut(e, "SffRead", "SffRead"); exit(1); } } //*************************************************************************************** bool SffRead::readSff(ifstream& in) { try { bool goodRead = true; if (!in.eof()) { unsigned long long startSpotInFile = in.tellg(); /*****************************************/ //read header length char* readHeaderLength = new char[2]; in.read(&(*readHeaderLength), 2); entireRead.push_back(readHeaderLength); headerLength = be_int2(*(unsigned short *)(readHeaderLength)); //read name length char* readNameLength = new char [2]; in.read(&(*readNameLength), 2); entireRead.push_back(readNameLength); nameLength = be_int2(*(unsigned short *)(readNameLength)); //read num bases char* readNumBases = new char [4]; in.read(&(*readNumBases), 4); entireRead.push_back(readNumBases); numBases = be_int4(*(unsigned int *)(readNumBases)); //read clip qual left char* rclipQualLeft = new char [2]; in.read(&(*rclipQualLeft), 2); entireRead.push_back(rclipQualLeft); clipQualLeft = be_int2(*(unsigned short *)(rclipQualLeft)); //read clip qual right char* rclipQualRight = new char [2]; in.read(&(*rclipQualRight), 2); entireRead.push_back(rclipQualRight); clipQualRight = be_int2(*(unsigned short *)(rclipQualRight)); //read clipAdapterLeft char* rclipAdapterLeft = new char [2]; in.read(&(*rclipAdapterLeft), 2); entireRead.push_back(rclipAdapterLeft); clipAdapterLeft = be_int2(*(unsigned short *)(rclipAdapterLeft)); //read clipAdapterRight char* rclipAdapterRight = new char [2]; in.read(&(*rclipAdapterRight), 2); entireRead.push_back(rclipAdapterRight); clipAdapterRight = be_int2(*(unsigned short *)(rclipAdapterRight)); //read name char* readName = new char[nameLength]; in.read(&(*readName), nameLength); for (int i = 0; i < nameLength; i++) { name += readName[i]; } entireRead.push_back(readName); //extract info from name decodeName(timestamp, region, xy, name); /* Pad to 8 chars */ unsigned long long spotInFile = in.tellg(); unsigned long long spot = (spotInFile + 7)& ~7; char* padding = new char[spot-spotInFile]; entireRead.push_back(padding); padSize1 = spot-spotInFile; in.seekg(spot); /*****************************************/ //sequence read //read flowgram flowgram.resize(numFlows); char* flows = new char[numFlows*2]; int count = 0; for (int i = 0; i < numFlows; i++) { char rflowgram [2]; in.read(rflowgram, 2); flows[count] = rflowgram[0]; count++; flows[count] = rflowgram[1]; count++; flowgram[i] = be_int2(*(unsigned short *)(&rflowgram)); } entireRead.push_back(flows); //read flowIndex flowIndex.resize(numBases); char* flowI = new char[numBases]; count = 0; for (int i = 0; i < numBases; i++) { char flowINdex[1]; in.read(flowINdex, 1); flowI[count] = flowINdex[0]; count++; flowIndex[i] = be_int1(*(unsigned char *)(&flowINdex)); } entireRead.push_back(flowI); //read bases char* readBases = new char[numBases]; in.read(&(*readBases), numBases); for (int i = 0; i < numBases; i++) { bases += readBases[i]; } entireRead.push_back(readBases); //read qual scores qualScores.resize(numBases, 0); char* scores = new char[numBases]; count = 0; for (int i = 0; i < numBases; i++) { char score[1]; in.read(score, 1); scores[count] = score[0]; count++; qualScores[i] = be_int1(*(unsigned char *)(&score)); } entireRead.push_back(scores); /* Pad to 8 chars */ spotInFile = in.tellg(); spot = (spotInFile + 7)& ~7; size = spot - startSpotInFile; char* padding2 = new char[spot-spotInFile]; entireRead.push_back(padding2); padSize2 = spot-spotInFile; goodRead = sanityCheck(); //ensure good reset in.seekg(spot); }else { size = 0; goodRead = false;} good = goodRead; return goodRead; } catch(exception& e) { m->errorOut(e, "SffRead", "read"); exit(1); } } //*************************************************************************************** int SffRead::decodeName(string& timestamp, string& region, string& xy, string name) { try { Utils util; if (name.length() >= 6) { string time = name.substr(0, 6); unsigned int timeNum = util.fromBase36(time); int q1 = timeNum / 60; int sec = timeNum - 60 * q1; int q2 = q1 / 60; int minute = q1 - 60 * q2; int q3 = q2 / 24; int hr = q2 - 24 * q3; int q4 = q3 / 32; int day = q3 - 32 * q4; int q5 = q4 / 13; int mon = q4 - 13 * q5; int year = 2000 + q5; timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec); } if (name.length() >= 9) { region = name.substr(7, 2); string xyNum = name.substr(9); unsigned int myXy = util.fromBase36(xyNum); int x = myXy >> 12; int y = myXy & 4095; xy = toString(x) + "_" + toString(y); } return 0; } catch(exception& e) { m->errorOut(e, "SffRead", "decodeName"); exit(1); } } //********************************************************************************************* bool SffRead::sanityCheck() { try { bool okay = true; string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + getName() + "\n"; int readLength = getBases().length(); int qualLength = getQualScores().size(); unsigned short clipLeft = getClipQualLeft(); unsigned short clipRight = getClipQualRight(); if (clipLeft > readLength) { okay = false; message += "Clip Qual Left = " + toString(clipLeft) + ", but we only read " + toString(readLength) + " bases.\n"; } if (clipRight > readLength) { okay = false; message += "Clip Qual Right = " + toString(clipRight) + ", but we only read " + toString(readLength) + " bases.\n"; } if (clipLeft > qualLength) { okay = false; message += "Clip Qual Left = " + toString(clipLeft) + ", but we only read " + toString(qualLength) + " quality scores.\n"; } if (clipRight > qualLength) { okay = false; message += "Clip Qual Right = " + toString(clipRight) + ", but we only read " + toString(qualLength) + " quality scores.\n"; } if (!okay) { m->mothurOut(message+"\n"); } return okay; } catch(exception& e) { m->errorOut(e, "SffRead", "sanityCheck"); exit(1); } } //********************************************************************************** void SffRead::printSff(ofstream& out) { try { if (entireRead.size() != 0) { out.write(entireRead[0], 2); //write header length out.write(entireRead[1], 2); //write name length out.write(entireRead[2], 4); //write num bases out.write(entireRead[3], 2); //write clip qual left out.write(entireRead[4], 2); //write clip qual right out.write(entireRead[5], 2); //write clipAdapterLeft out.write(entireRead[6], 2); //write clipAdapterRight out.write(entireRead[7], nameLength); //write name out.write(entireRead[8], padSize1); //write pad1 out.write(entireRead[9], numFlows*2); //write flowgram out.write(entireRead[10], numBases); //write flowIndex out.write(entireRead[11], numBases); //write bases out.write(entireRead[12], numBases); //write qual scores out.write(entireRead[13], padSize2); //write pad2 } else { m->mothurOut("[ERROR]: cannot print sff, did not read it, skipping.\n"); } } catch(exception& e) { m->errorOut(e, "SffRead", "printSff"); exit(1); } } //*************************************************************************************** void SffRead::printFasta(ofstream& out, bool trim) { try { string seq = bases; if (trim) { if(clipQualRight < clipQualLeft){ if (clipQualRight == 0) { //don't trim right seq = seq.substr(clipQualLeft-1); }else { seq = "NNNN"; } } else if((clipQualRight != 0) && ((clipQualRight-clipQualLeft) >= 0)){ seq = seq.substr((clipQualLeft-1), (clipQualRight-clipQualLeft+1)); } else { seq = seq.substr(clipQualLeft-1); } }else{ int endValue = clipQualRight; //make the bases you want to clip lowercase and the bases you want to keep upper case if(endValue == 0){ endValue = seq.length(); } for (int i = 0; i < (clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); } for (int i = (clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); } for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); } } out << ">" << name << " xy=" << xy << endl; out << seq << endl; } catch(exception& e) { m->errorOut(e, "SffRead", "printFasta"); exit(1); } } //********************************************************************************************************************** void SffRead::printQuality(ofstream& out, bool trim) { try { if (trim) { if(clipQualRight < clipQualLeft){ if (clipQualRight == 0) { //don't trim right out << ">" << name << " xy=" << xy << " length=" << (qualScores.size()-clipQualLeft) << endl; for (int i = (clipQualLeft-1); i < qualScores.size(); i++) { out << qualScores[i] << '\t'; } }else { out << ">" << name << " xy=" << xy << endl; out << "0\t0\t0\t0"; } } else if((clipQualRight != 0) && ((clipQualRight-clipQualLeft) >= 0)){ out << ">" << name << " xy=" << xy << " length=" << (clipQualRight-clipQualLeft+1) << endl; for (int i = (clipQualLeft-1); i < (clipQualRight); i++) { out << qualScores[i] << '\t'; } } else{ out << ">" << name << " xy=" << xy << " length=" << (clipQualRight-clipQualLeft) << endl; for (int i = (clipQualLeft-1); i < qualScores.size(); i++) { out << qualScores[i] << '\t'; } } }else{ out << ">" << name << " xy=" << xy << " length=" << qualScores.size() << endl; for (int i = 0; i < qualScores.size(); i++) { out << qualScores[i] << '\t'; } } out << endl; } catch(exception& e) { m->errorOut(e, "SffInfoCommand", "printQuality"); exit(1); } } //********************************************************************************************************************** void SffRead::printFlow(ofstream& out) { try { int endValue = clipQualRight; if (clipQualRight == 0) { endValue = flowIndex.size(); if (m->getDebug()) { m->mothurOut("[DEBUG]: " + name + " has clipQualRight=0.\n"); } } if(endValue > clipQualLeft){ int rightIndex = 0; for (int i = 0; i < endValue; i++) { rightIndex += flowIndex[i]; } out << name << ' ' << rightIndex; for (int i = 0; i < flowgram.size(); i++) { out << setprecision(2) << ' ' << (flowgram[i]/(float)100); } out << endl; } } catch(exception& e) { m->errorOut(e, "SffRead", "printFlow"); exit(1); } } //********************************************************************************************************************** void SffRead::printSffTxt(ofstream& out) { try { printSffTxtHeader(out); out << "Flowgram: "; for (int i = 0; i < flowgram.size(); i++) { out << setprecision(2) << (flowgram[i]/(float)100) << '\t'; } out << endl << "Flow Indexes: "; int sum = 0; for (int i = 0; i < flowIndex.size(); i++) { sum += flowIndex[i]; out << sum << '\t'; } //make the bases you want to clip lowercase and the bases you want to keep upper case int endValue = clipQualRight; if(endValue == 0){ endValue = bases.length(); } for (int i = 0; i < (clipQualLeft-1); i++) { bases[i] = tolower(bases[i]); } for (int i = (clipQualLeft-1); i < (endValue-1); i++) { bases[i] = toupper(bases[i]); } for (int i = (endValue-1); i < bases.length(); i++) { bases[i] = tolower(bases[i]); } out << endl << "Bases: " << bases << endl << "Quality Scores: "; for (int i = 0; i < qualScores.size(); i++) { out << qualScores[i] << '\t'; } out << endl << endl; } catch(exception& e) { m->errorOut(e, "SffRead", "printSffTxt"); exit(1); } } //********************************************************************************************************************** void SffRead::printSffTxtHeader(ofstream& out) { try { out << ">" << name << endl; out << "Run Prefix: " << timestamp << endl; out << "Region #: " << region << endl; out << "XY Location: " << xy << endl << endl; out << "Run Name: " << endl; out << "Analysis Name: " << endl; out << "Full Path: " << endl << endl; out << "Read Len: " << headerLength << endl; out << "Name Length: " << nameLength << endl; out << "# of Bases: " << numBases << endl; out << "Clip Qual Left: " << clipQualLeft << endl; out << "Clip Qual Right: " << clipQualRight << endl; out << "Clip Adap Left: " << clipAdapterLeft << endl; out << "Clip Adap Right: " << clipAdapterRight << endl << endl; } catch(exception& e) { m->errorOut(e, "SffRead", "printSffTxtHeader"); exit(1); } } //**************************************************************************************** mothur-1.48.0/source/datastructures/sffread.hpp000066400000000000000000000105441424121717000216250ustar00rootroot00000000000000// // sffread.hpp // Mothur // // Created by Sarah Westcott on 6/9/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef sffread_hpp #define sffread_hpp #include "mothurout.h" #include "endiannessmacros.h" #include "utils.hpp" /* This class is a representation of a sff read. https://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format#sff readHeader: read_header_length uint16_t name_length uint16_t number_of_bases uint32_t clip_qual_left uint16_t clip_qual_right uint16_t clip_adapter_left uint16_t clip_adapter_right uint16_t name char[name_length] eight_byte_padding uint8_t[*] readInfo: flowgram_values uint*_t[number_of_flows] flow_index_per_base uint8_t[number_of_bases] bases char[number_of_bases] quality_scores uint8_t[number_of_bases] eight_byte_padding uint8_t[*] */ class SffRead { public: SffRead(ifstream&, int); SffRead(int num); ~SffRead(); bool readSff(ifstream& in); bool isOkay() { return good; } void printFasta(ofstream& out, bool trim); void printQuality(ofstream& out, bool trim); void printFlow(ofstream& out); void printSff(ofstream& out); void printSffTxt(ofstream& out); //read header info string getName() { return name; } string getTimeStamp() { return timestamp; } string getRegion() { return region; } string getXY() { return xy; } unsigned short getHeaderLength() { return headerLength; } unsigned short getNameLength() { return nameLength; } unsigned short getClipQualLeft() { return clipQualLeft; } unsigned short getClipQualRight() { return clipQualRight; } unsigned short getClipAdapterLeft() { return clipAdapterLeft; } unsigned short getClipAdapterRight() { return clipAdapterRight; } unsigned int getNumBases() { return numBases; } //read info vector getFlowgrams() { return flowgram; } vector getFlowIndex() { return flowIndex; } vector getQualScores() { return qualScores; } string getBases() { return bases; } void setName(string n) { name = n; } void setTimeStamp(string n) { timestamp = n; } void setRegion(string n) { region = n; } void setXY(string n) { xy = n; } void setHeaderLength(unsigned short n) { headerLength = n; } void setNameLength(unsigned short n) { nameLength = n; } void setClipQualLeft(unsigned short n) { clipQualLeft = n; } void setClipQualRight(unsigned short n) { clipQualRight = n; } void setClipAdapterLeft(unsigned short n) { clipAdapterLeft = n; } void setClipAdapterRight(unsigned short n) { clipAdapterRight = n; } void setNumBases(unsigned int n) { numBases = n; } //read info void setFlowgrams(vector n) { flowgram = n; } void setFlowIndex(vector n) { flowIndex = n; } void setQualScores(vector n) { qualScores = n; } void setBases(string n) { bases = n; } private: MothurOut* m; vector entireRead; //header fields unsigned short headerLength; unsigned short nameLength; unsigned int numBases; unsigned short clipQualLeft; unsigned short clipQualRight; unsigned short clipAdapterLeft; unsigned short clipAdapterRight; string name; //length depends on nameLength string timestamp; string region; string xy; //readFields vector flowgram; vector flowIndex; string bases; vector qualScores; int numFlows, padSize1, padSize2; unsigned long long size; bool good; void printSffTxtHeader(ofstream& out); int decodeName(string&, string&, string&, string); bool sanityCheck(); }; #endif /* sffread_hpp */ mothur-1.48.0/source/datastructures/sharedclrvector.cpp000066400000000000000000000140271424121717000234000ustar00rootroot00000000000000// // sharedclrvector.cpp // Mothur // // Created by Sarah Westcott on 1/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "sharedclrvector.hpp" /***********************************************************************/ SharedCLRVector::SharedCLRVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0), group("") {} /***********************************************************************/ SharedCLRVector::SharedCLRVector(int n) : DataVector(), data(n,0) , maxRank(0), numBins(n), numSeqs(0), group("") {} /***********************************************************************/ SharedCLRVector::SharedCLRVector(vector rav) : DataVector(), maxRank(0), numBins(rav.size()), numSeqs(0), group("") { try { data.assign(numBins, 0); for(int i=0;ierrorOut(e, "SharedCLRVector", "SharedCLRVector"); exit(1); } } /***********************************************************************/ SharedCLRVector::SharedCLRVector(vector rav, float mr, int nb, float ns) : DataVector(), group(""){ try { numBins = nb; maxRank = mr; numSeqs = ns; data = rav; } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "SharedCLRVector"); exit(1); } } /***********************************************************************/ SharedCLRVector::SharedCLRVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { f >> label >> group >> numBins; data.assign(numBins, 0); float inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "SharedCLRVector"); exit(1); } } /***********************************************************************/ SharedCLRVector::SharedCLRVector(ifstream& f, string l, string g, int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) { try { label = l; group = g; data.assign(numBins, 0); float inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "SharedCLRVector"); exit(1); } } /***********************************************************************/ void SharedCLRVector::set(int binNumber, float newBinSize){ try { int oldBinSize = data[binNumber]; data[binNumber] = newBinSize; if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs += (newBinSize - oldBinSize); } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "set"); exit(1); } } /***********************************************************************/ float SharedCLRVector::get(int index){ return data[index]; } /***********************************************************************/ void SharedCLRVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; group = ""; data.clear(); } /***********************************************************************/ void SharedCLRVector::push_back(float binSize){ try { data.push_back(binSize); numBins++; if(binSize > maxRank){ maxRank = binSize; } numSeqs += binSize; } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "push_back"); exit(1); } } /***********************************************************************/ float SharedCLRVector::remove(int bin){ try { float abund = data[bin]; data.erase(data.begin()+bin); numBins--; if(abund == maxRank){ maxRank = util.max(data); } numSeqs -= abund; return abund; } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "remove"); exit(1); } } /***********************************************************************/ float SharedCLRVector::remove(vector bins){ try { if (bins.size() == 0) { return 0; } int numRemoved = 0; vector newData; int binIndex = 0; for (int i = 0; i < data.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newData.push_back(data[i]); }else if (i == bins[binIndex]) { binIndex++; numRemoved += data[i]; if (binIndex > bins.size()) { //removed all bins newData.insert(newData.end(), data.begin()+i, data.end()); //add rest of good bins break; } } } data = newData; numBins = data.size(); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs -= numRemoved; return numRemoved; } catch(exception& e) { m->errorOut(e, "SharedCLRVector", "remove"); exit(1); } } /***********************************************************************/ void SharedCLRVector::resize(int size){ data.resize(size); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs = util.sum(data); numBins = size; } /***********************************************************************/ int SharedCLRVector::size(){ return data.size(); } /***********************************************************************/ void SharedCLRVector::print(ostream& output){ try { output << label; output << '\t' << group << '\t' << numBins; for(int i=0;ierrorOut(e, "SharedCLRVector", "nonSortedPrint"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedclrvector.hpp000066400000000000000000000037141424121717000234060ustar00rootroot00000000000000// // sharedclrvector.hpp // Mothur // // Created by Sarah Westcott on 1/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef sharedclrvector_hpp #define sharedclrvector_hpp #include "datavector.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "ordervector.hpp" class SharedCLRVector : public DataVector { public: SharedCLRVector(); SharedCLRVector(int); SharedCLRVector(vector, float, int, float); //maxRank, numbins, numSeqs SharedCLRVector(vector); SharedCLRVector(const SharedCLRVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group) {}; SharedCLRVector(ifstream&); SharedCLRVector(ifstream& f, string l, string g, int); //filehandle, label ~SharedCLRVector(){} int getNumBins() { return numBins; } float getNumSeqs() { return numSeqs; } float getMaxRank() { return maxRank; } float remove(int); float remove(vector); void set(int, float); float get(int); vector get() { return data; } void push_back(float); void resize(int); int size(); void clear(); void print(ostream&); //nonsorted string getGroup() { return group; } //group = "" for rabunds without groupInfo void setGroup(string g) { group = g; } RAbundVector getRAbundVector() { m->mothurOut("[ERROR]: can not use getRAbundVector for SharedCLRVector.\n"); RAbundVector r; return r; } SAbundVector getSAbundVector() { m->mothurOut("[ERROR]: can not use getSAbundVector for SharedCLRVector.\n"); SAbundVector s; return s; } OrderVector getOrderVector(map* hold = nullptr) { m->mothurOut("[ERROR]: can not use getOrderVector for SharedCLRVector.\n"); OrderVector o; return o; } private: vector data; float maxRank; int numBins; float numSeqs; string group; }; #endif /* sharedclrvector_hpp */ mothur-1.48.0/source/datastructures/sharedclrvectors.cpp000066400000000000000000000436321424121717000235670ustar00rootroot00000000000000// // sharedclrvectors.cpp // Mothur // // Created by Sarah Westcott on 1/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "sharedclrvectors.hpp" /***********************************************************************/ //reads a clr file SharedCLRVectors::SharedCLRVectors(ifstream& f, vector& userGroups, string& nextLabel, string& labelTag) : DataVector() { try { int num; string holdLabel, groupN; int numUserGroups = userGroups.size(); for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); //are we at the beginning of the file?? if (nextLabel == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "group" f >> label; gobble(f); //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); currentLabels.push_back(temp); } if (currentLabels.size() != 0) { string binLabelTag = currentLabels[0]; labelTag = ""; for (int i = 0; i < binLabelTag.length(); i++) { if (isalpha(binLabelTag[i])){ labelTag += binLabelTag[i]; } } } f >> label >> groupN >> num; }else { //read in first row since you know there is at least 1 group. f >> groupN >> num; //make binlabels because we don't have any string snumBins = toString(num); if (labelTag == "") { labelTag = "Otu"; } for (int i = 0; i < num; i++) { //if there is a bin label use it otherwise make one string binLabel = labelTag; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; currentLabels.push_back(binLabel); } } }else { label = nextLabel; //read in first row since you know there is at least 1 group. f >> groupN >> num; } bool readData = false; bool remove = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; }// skipline because you are a group we dont care about } holdLabel = label; numBins = num; if (readData) { //add new vector to lookup SharedCLRVector* temp = new SharedCLRVector(f, label, groupN, numBins); push_back(temp); } else { util.getline(f); } gobble(f); if (!(f.eof())) { f >> nextLabel; } //read the rest of the groups info in while ((nextLabel == holdLabel) && (f.eof() != true)) { f >> groupN >> num; bool readData = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; }// skipline because you are a group we dont care about } if (readData) { SharedCLRVector* temp = new SharedCLRVector(f, label, groupN, numBins); push_back(temp); }else { util.getline(f); } gobble(f); if (f.eof() != true) { f >> nextLabel; } } otuTag = labelTag; //error in names of user inputted Groups if (lookup.size() < userGroups.size()) { m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); m->setControl_pressed(true); } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "SharedCLRVectors"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::print(ostream& output, bool& printOTUHeaders){ try { printHeaders(output, printOTUHeaders); sort(lookup.begin(), lookup.end(), compareCLRVectors); for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { break; } lookup[i]->print(output); } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "print"); exit(1); } } /***********************************************************************/ string SharedCLRVectors::getOTUName(int bin){ try { if (currentLabels.size() > bin) { } else { getOTUNames(); } return currentLabels[bin]; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getOTUName"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::setOTUName(int bin, string otuName){ try { if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { getOTUNames(); //fills currentLabels if needed if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { m->setControl_pressed(true); m->mothurOut("[ERROR]: " + toString(bin) + " bin does not exist\n"); } } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "setOTUName"); exit(1); } } /***********************************************************************/ int SharedCLRVectors::push_back(vector abunds, string binLabel){ try { if (abunds.size() != lookup.size()) { m->mothurOut("[ERROR]: you have provided " + toString(abunds.size()) + " abundances, but mothur was expecting " + toString(lookup.size()) + ", please correct.\n"); m->setControl_pressed(true); return 0; } for (int i = 0; i < lookup.size(); i ++) { lookup[i]->push_back(abunds[i]); } //vector currentLabels = m->getCurrentSharedBinLabels(); if (binLabel == "") { //create one int otuNum = 1; bool notDone = true; //find label prefix string prefix = "Otu"; if (currentLabels.size() != 0) { if (currentLabels[currentLabels.size()-1][0] == 'P') { prefix = "PhyloType"; } string tempLabel = currentLabels[currentLabels.size()-1]; string simpleLastLabel = util.getSimpleLabel(tempLabel); util.mothurConvert(simpleLastLabel, otuNum); otuNum++; } string potentialLabel = toString(otuNum); while (notDone) { if (m->getControl_pressed()) { notDone = false; break; } potentialLabel = toString(otuNum); vector::iterator it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { potentialLabel = prefix + toString(otuNum); it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { notDone = false; break; } } otuNum++; } binLabel = potentialLabel; } currentLabels.push_back(binLabel); numBins++; return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "push_back"); exit(1); } } /***********************************************************************/ int SharedCLRVectors::push_back(SharedCLRVector* thisLookup){ try { if (numBins == 0) { numBins = thisLookup->getNumBins(); } lookup.push_back(thisLookup); sort(lookup.begin(), lookup.end(), compareCLRVectors); if (label == "") { label = thisLookup->getLabel(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "push_back"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::getOTUTotal(int bin){ try { float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i++) { totalOTUAbund += lookup[i]->get(bin); } return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getOTUTotal"); exit(1); } } /***********************************************************************/ vector SharedCLRVectors::getOTU(int bin){ try { vector abunds; for (int i = 0; i < lookup.size(); i++) { abunds.push_back(lookup[i]->get(bin)); } return abunds; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getOTU"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::setLabels(string l){ try { label = l; for (int i = 0; i < lookup.size(); i++) { lookup[i]->setLabel(l); } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "setLabels"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::get(int bin, string group){ try { float abund = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { abund = lookup[it->second]->get(bin); } return abund; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "get"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::getNumSeqs(string group){ try { float numSeqs = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { numSeqs = lookup[it->second]->getNumSeqs(); } return numSeqs; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getNumSeqs"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::set(int bin, float binSize, string group){ try { map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { lookup[it->second]->set(bin, binSize); } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "set"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::removeOTU(int bin){ try { float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bin); } currentLabels.erase(currentLabels.begin()+bin); numBins--; return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "removeOTU"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::removeOTUs(vector bins, bool sorted){ try { if (bins.size() == 0) { return 0; } if (!sorted) { sort(bins.begin(), bins.end()); } float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bins); } vector newLabels; int binIndex = 0; for (int i = 0; i < currentLabels.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newLabels.push_back(currentLabels[i]); }else if (i == bins[binIndex]) { binIndex++; if (binIndex > bins.size()) { //removed all bins newLabels.insert(newLabels.end(), currentLabels.begin()+i, currentLabels.end()); //add rest of good bins break; } } } currentLabels = newLabels; numBins = currentLabels.size(); return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "removeOTUs"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::setOTUNames(vector names){ try { currentLabels.clear(); currentLabels = names; getOTUNames(); } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "setOTUNames"); exit(1); } } /***********************************************************************/ vector SharedCLRVectors::getOTUNames(){ try { util.getOTUNames(currentLabels, numBins, otuTag); return currentLabels; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getOTUNames"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::printHeaders(ostream& output, bool& printSharedHeaders){ try { if (printSharedHeaders) { getOTUNames(); output << "label\tGroup\tnumOtus"; for (int i = 0; i < numBins; i++) { output << '\t' << currentLabels[i]; } output << endl; printSharedHeaders = false; } } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "printHeaders"); exit(1); } } /***********************************************************************/ vector SharedCLRVectors::getNamesGroups(){ try { vector names; for (int i = 0; i < lookup.size(); i ++) { names.push_back(lookup[i]->getGroup()); } return names; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getNamesGroups"); exit(1); } } /***********************************************************************/ float SharedCLRVectors::getNumSeqsSmallestGroup(){ try { float smallest = MOTHURMAX; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < smallest) { smallest = lookup[i]->getNumSeqs(); } } return smallest; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getNumSeqsSmallestGroup"); exit(1); } } /***********************************************************************/ vector SharedCLRVectors::getSharedCLRVectors(){ try { vector newLookup; for (int i = 0; i < lookup.size(); i++) { SharedCLRVector* temp = new SharedCLRVector(*lookup[i]); newLookup.push_back(temp); } return newLookup; } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "getSharedCLRVectors"); exit(1); } } /***********************************************************************/ void SharedCLRVectors::removeGroups(vector g){ try { bool remove = false; for (vector::iterator it = lookup.begin(); it != lookup.end();) { //if this sharedrabund is not from a group the user wants then delete it. if (util.inUsersGroups((*it)->getGroup(), g) ) { remove = true; delete (*it); (*it) = nullptr; it = lookup.erase(it); }else { ++it; } } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "removeGroups"); exit(1); } } /***********************************************************************/ int SharedCLRVectors::removeGroups(int minSize, bool silent){ try { vector Groups; for (vector::iterator it = lookup.begin(); it != lookup.end();) { if ((*it)->getNumSeqs() < minSize) { if (!silent) { m->mothurOut((*it)->getGroup() + " contains " + toString((*it)->getNumSeqs()) + ". Eliminating.\n"); } delete (*it); (*it) = nullptr; it = lookup.erase(it); }else { Groups.push_back((*it)->getGroup()); ++it; } } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedCLRVectors", "removeGroups"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedclrvectors.hpp000066400000000000000000000071161424121717000235710ustar00rootroot00000000000000// // sharedclrvectors.hpp // Mothur // // Created by Sarah Westcott on 1/21/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef sharedclrvectors_hpp #define sharedclrvectors_hpp #include "datavector.hpp" #include "sharedclrvector.hpp" /* DataStructure for a clr relabund file. The clr - log centered ratio - is the log (base 2) of a value divided by the geometric mean of the values across all OTUs for that sample. For example here are the counts for four OTUs in one sample... > x <- c(10, 5, 3, 1) > log2(x / prod(x)^(1/4)) [1] 1.5147234 0.5147234 -0.2222422 -1.8072047 */ //******************************************************************************************************************** inline bool compareCLRVectors(SharedCLRVector* left, SharedCLRVector* right){ return (left->getGroup() < right->getGroup()); } //******************************************************************************************************************** class SharedCLRVectors : public DataVector { public: SharedCLRVectors() : DataVector() { label = ""; numBins = 0; otuTag = "Otu"; } SharedCLRVectors(ifstream&, vector&, string&, string&); SharedCLRVectors(SharedCLRVectors& bv) : DataVector(bv), numBins(bv.numBins), otuTag(bv.otuTag) { vector data = bv.getSharedCLRVectors(); for (int i = 0; i < data.size(); i++) { push_back(data[i]); } setLabels(bv.getLabel()); setOTUNames(bv.getOTUNames()); //eliminateZeroOTUS(); } ~SharedCLRVectors() { clear(); } vector getNamesGroups(); void setLabels(string l); float getOTUTotal(int bin); vector getOTU(int bin); float removeOTU(int bin); float removeOTUs(vector, bool sorted=false); //bins to remove, sorted or not float get(int bin, string group); void set(int bin, float binSize, string group); void setOTUNames(vector names); vector getOTUNames(); string getOTUName(int); void setOTUName(int, string); int push_back(vector, string binLabel=""); //add otu. mothur assumes abunds are in same order as groups. int push_back(SharedCLRVector*); void removeGroups(vector g); int removeGroups(int minSize, bool silent=false); // removes any groups with numSeqs < minSize void resize(int n) { m->mothurOut("[ERROR]: can not use resize for SharedCLRVectors.\n"); } void clear() { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); groupNames.clear(); numBins = 0; currentLabels.clear(); } int size() { return (int)lookup.size(); } int getNumGroups() { return (int)lookup.size(); } int getNumBins() { return numBins; } float getNumSeqs(string); //group float getNumSeqsSmallestGroup(); void print(ostream&, bool&); vector getSharedCLRVectors(); RAbundVector getRAbundVector() { m->mothurOut("[ERROR]: can not use getRAbundVector for SharedCLRVectors.\n"); RAbundVector r; return r; } SAbundVector getSAbundVector() { m->mothurOut("[ERROR]: can not use getSAbundVector for SharedCLRVectors.\n"); SAbundVector s; return s; } OrderVector getOrderVector(map* hold = nullptr) { m->mothurOut("[ERROR]: can not use getOrderVector for SharedCLRVectors.\n"); OrderVector o; return o; } private: void printHeaders(ostream&, bool&); vector lookup; vector currentLabels; map groupNames; int numBins; string otuTag; }; #endif /* sharedclrvectors_hpp */ mothur-1.48.0/source/datastructures/sharedlistvector.cpp000077500000000000000000000353311424121717000235770ustar00rootroot00000000000000/* * sharedSharedListVector.cpp * Mothur * * Created by Sarah Westcott on 1/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sabundvector.hpp" #include "rabundvector.hpp" #include "ordervector.hpp" #include "sharedlistvector.h" #include "sharedordervector.h" /***********************************************************************/ SharedListVector::SharedListVector(ifstream& f, vector& userGroups, string& previousLabel, string& labelTag) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { Utils util; groups = userGroups; fillGroups = true; if (groups.size() > 0) { fillGroups = false; } CurrentFile* current = CurrentFile::getInstance(); groupMode = current->getGroupMode(); groupmap = nullptr; countTable = nullptr; //set up groupmap for later. if (groupMode == "group") { groupmap = new GroupMap(current->getGroupFile()); groupmap->readMap(); if (fillGroups) { groups = groupmap->getNamesOfGroups(); m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); fillGroups = false; } else { if (!util.isSubset(groupmap->getNamesOfGroups(), groups)) { m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); m->setControl_pressed(true); } } }else { countTable = new CountTable(); countTable->readTable(current->getCountFile(), true, false); if (fillGroups) { groups = countTable->getNamesOfGroups(); fillGroups = false; } else { if (!util.isSubset(countTable->getNamesOfGroups(), groups)) { m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); m->setControl_pressed(true); } } } int hold; //are we at the beginning of the file?? If yes, read or create headers if (previousLabel == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); binLabels.push_back(temp); } if (binLabels.size() != 0) { string binLabelTag = binLabels[0]; labelTag = ""; for (int i = 0; i < binLabelTag.length(); i++) { if (isalpha(binLabelTag[i])){ labelTag += binLabelTag[i]; } } } f >> label >> hold; }else { //read in first row f >> hold; //make binlabels because we don't have any string snumBins = toString(hold); if (labelTag == "") { labelTag = "Otu"; } for (int i = 0; i < hold; i++) { //if there is a bin label use it otherwise make one string binLabel = labelTag; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; binLabels.push_back(binLabel); } } }else { f >> label >> hold; } data.assign(hold, ""); string inputData = ""; otuTag = labelTag; previousLabel = label; for(int i=0;i> inputData; set(i, inputData); } gobble(f); } catch(exception& e) { m->errorOut(e, "SharedListVector", "SharedListVector"); exit(1); } } /***********************************************************************/ void SharedListVector::set(int binNumber, string seqNames){ try { Utils util; int nNames_old = util.getNumNames(data[binNumber]); data[binNumber] = seqNames; int nNames_new = util.getNumNames(seqNames); if(nNames_old == 0) { numBins++; } if(nNames_new == 0) { numBins--; } if(nNames_new > maxRank) { maxRank = nNames_new; } numSeqs += (nNames_new - nNames_old); } catch(exception& e) { m->errorOut(e, "SharedListVector", "set"); exit(1); } } /***********************************************************************/ string SharedListVector::get(int index){ return data[index]; } /***********************************************************************/ void SharedListVector::setLabels(vector labels){ try { binLabels = labels; getLabels(); } catch(exception& e) { m->errorOut(e, "SharedListVector", "setLabels"); exit(1); } } /***********************************************************************/ //could potentially end up with duplicate binlabel names with code below. //we don't currently use them in a way that would do that. //if you had a listfile that had been subsampled and then added to it, dup names would be possible. vector SharedListVector::getLabels(){ try { Utils util; util.getOTUNames(binLabels, numBins, otuTag); return binLabels; } catch(exception& e) { m->errorOut(e, "SharedListVector", "getLabels"); exit(1); } } /***********************************************************************/ void SharedListVector::push_back(string seqNames){ try { Utils util; data.push_back(seqNames); int nNames = util.getNumNames(seqNames); numBins++; if(nNames > maxRank) { maxRank = nNames; } numSeqs += nNames; int otuNum = numBins; bool notDone = true; //find label prefix string prefix = "Otu"; if (binLabels[binLabels.size()-1][0] == 'P') { prefix = "PhyloType"; } string tempLabel = binLabels[binLabels.size()-1]; string simpleLastLabel = util.getSimpleLabel(tempLabel); util.mothurConvert(simpleLastLabel, otuNum); otuNum++; string potentialLabel = toString(otuNum); while (notDone) { if (m->getControl_pressed()) { notDone = false; break; } potentialLabel = toString(otuNum); vector::iterator it = find(binLabels.begin(), binLabels.end(), potentialLabel); if (it == binLabels.end()) { potentialLabel = prefix + toString(otuNum); it = find(binLabels.begin(), binLabels.end(), potentialLabel); if (it == binLabels.end()) { notDone = false; break; } } otuNum++; } binLabels.push_back(potentialLabel); } catch(exception& e) { m->errorOut(e, "SharedListVector", "push_back"); exit(1); } } /***********************************************************************/ void SharedListVector::resize(int size){ data.resize(size); } /***********************************************************************/ int SharedListVector::size(){ return data.size(); } /***********************************************************************/ void SharedListVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; binLabels.clear(); return data.clear(); } /***********************************************************************/ void SharedListVector::print(ostream& output){ try { output << label << '\t' << numBins; for(int i=0;ierrorOut(e, "SharedListVector", "print"); exit(1); } } /***********************************************************************/ RAbundVector SharedListVector::getRAbundVector(){ try { RAbundVector rav; Utils util; for(int i=0;ierrorOut(e, "SharedListVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector SharedListVector::getSAbundVector(){ try { SAbundVector sav(maxRank+1); Utils util; for(int i=0;ierrorOut(e, "SharedListVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ SharedOrderVector* SharedListVector::getSharedOrderVector(){ try { SharedOrderVector* order = new SharedOrderVector(); order->setLabel(label); Utils util; for(int i=0;i binNames; util.splitAtComma(names, binNames); for (int j = 0; j < binNames.size(); j++) { if (m->getControl_pressed()) { return order; } if (groupMode == "group") { string groupName = groupmap->getGroup(binNames[i]); if(groupName == "not found") { m->mothurOut("Error: Sequence '" + binNames[i] + "' was not found in the group file, please correct.\n"); exit(1); } if (util.inUsersGroups(groupName, groups)) { order->push_back(i, groupName); }//i represents what bin you are in }else { vector groupAbundances = countTable->getGroupCounts(binNames[i]); vector groupNames = countTable->getNamesOfGroups(); for (int k = 0; k < groupAbundances.size(); k++) { //groupAbundances.size() == 0 if there is a file mismatch and m->control_pressed is true. if (m->getControl_pressed()) { return order; } for (int l = 0; l < groupAbundances[k]; l++) { //for each abundance != 0, add a individual for each if (util.inUsersGroups(groupNames[k], groups)) { order->push_back(i, groupNames[k]); } } } } } } util.mothurRandomShuffle(*order); order->updateStats(); return order; } catch(exception& e) { m->errorOut(e, "SharedListVector", "getSharedOrderVector"); exit(1); } } /***********************************************************************/ SharedRAbundVectors* SharedListVector::getSharedRAbundVector() { try { vector lookup; //contains just the groups the user selected //vector groups; map finder; //contains all groups in groupmap map::iterator it; for (int i = 0; i < groups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(numBins); finder[groups[i]] = temp; finder[groups[i]]->setLabel(label); finder[groups[i]]->setGroup(groups[i]); lookup.push_back(finder[groups[i]]); } Utils util; //fill vectors for(int i=0;i binNames; util.splitAtComma(names, binNames); for (int j = 0; j < binNames.size(); j++) { if (groupMode == "group") { string group = groupmap->getGroup(binNames[j]); if(group == "not found") { m->mothurOut("Error: Sequence '" + binNames[j] + "' was not found in the group file, please correct.\n"); exit(1); } it = finder.find(group); if (it != finder.end()) { it->second->set(i, it->second->get(i) + 1); } //i represents what bin you are in }else{ vector counts = countTable->getGroupCounts(binNames[j]); vector allGroups = countTable->getNamesOfGroups(); for (int k = 0; k < allGroups.size(); k++) { it = finder.find(allGroups[k]); if (it != finder.end()) { it->second->set(i, it->second->get(i) + counts[k]); } //i represents what bin you are in } } } } SharedRAbundVectors* shared = new SharedRAbundVectors(otuTag); for (int j = 0; j < lookup.size(); j++) { shared->push_back(lookup[j]); } shared->setOTUNames(binLabels); shared->eliminateZeroOTUS(); return shared; } catch(exception& e) { m->errorOut(e, "SharedListVector", "getSharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVectors* SharedListVector::getSharedRAbundFloatVector() { try { SharedRAbundVectors* shared = getSharedRAbundVector(); vector thisLookup = shared->getSharedRAbundFloatVectors(); SharedRAbundFloatVectors* sharedFloat = new SharedRAbundFloatVectors(otuTag); for (int j = 0; j < thisLookup.size(); j++) { sharedFloat->push_back(thisLookup[j]); } return sharedFloat; } catch(exception& e) { m->errorOut(e, "SharedListVector", "getSharedRAbundVector"); exit(1); } } /***********************************************************************/ OrderVector SharedListVector::getOrderVector(map* orderMap = nullptr){ try { Utils util; if(orderMap == nullptr){ OrderVector ov; for(int i=0;i binNames; util.splitAtComma(names, binNames); int binSize = binNames.size(); if (groupMode != "group") { binSize = 0; for (int j = 0; j < binNames.size(); j++) { binSize += countTable->getNumSeqs(binNames[i]); } } for(int j=0;j binNames; util.splitAtComma(listOTU, binNames); for (int j = 0; j < binNames.size(); j++) { if(orderMap->count(binNames[j]) == 0){ m->mothurOut(binNames[j] + " not found, check *.names file\n"); exit(1); } ov.set((*orderMap)[binNames[j]], i); } } ov.setLabel(label); ov.getNumBins(); return ov; } } catch(exception& e) { m->errorOut(e, "SharedListVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedlistvector.h000077500000000000000000000047501424121717000232450ustar00rootroot00000000000000#ifndef SHAREDLIST_H #define SHAREDLIST_H /* * sharedlistvector.h * Mothur * * Created by Sarah Westcott on 1/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "datavector.hpp" #include "groupmap.h" #include "counttable.h" #include "sharedrabundvectors.hpp" #include "sharedrabundfloatvectors.hpp" #include "currentfile.h" /* This class is a child to datavector. It represents OTU information at a certain distance. A sharedlistvector can be converted into a sharedordervector, sharedrabundvector or sharedsabundvectorand as well as an ordervector, rabundvector or sabundvector. Each member of the internal container "data" represents an individual OTU. Each individual in the OTU belongs to a group. So data[0] = "a,b,c,d,e,f". example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 */ class SharedListVector : public DataVector { public: SharedListVector(ifstream&, vector&, string&, string&); SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels), groups(lv.groups), fillGroups(lv.fillGroups), groupMode(lv.groupMode), otuTag(lv.otuTag) { groupmap = nullptr; countTable = nullptr; }; ~SharedListVector(){ if (groupmap != nullptr) { delete groupmap; } if (countTable != nullptr) { delete countTable; } }; int getNumBins() { return numBins; } int getNumSeqs() { return numSeqs; } int getMaxRank() { return maxRank; } void set(int, string); string get(int); vector getLabels(); void setLabels(vector); void push_back(string); void resize(int); void clear(); int size(); void print(ostream&); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); SharedOrderVector* getSharedOrderVector(); SharedRAbundVectors* getSharedRAbundVector(); //returns sharedRabundVectors for all the users groups SharedRAbundFloatVectors* getSharedRAbundFloatVector(); //returns sharedRabundVectors for all the users groups private: vector data; //data[i] is a list of names of sequences in the ith OTU. GroupMap* groupmap; CountTable* countTable; vector groups; bool fillGroups; int maxRank; int numBins; int numSeqs; vector binLabels; string groupMode, otuTag; }; #endif mothur-1.48.0/source/datastructures/sharedordervector.cpp000077500000000000000000000253721424121717000237430ustar00rootroot00000000000000/* * sharedSharedOrderVector.cpp * Dotur * * Created by Sarah Westcott on 12/9/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedordervector.h" /***********************************************************************/ SharedOrderVector::SharedOrderVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0) {} /***********************************************************************/ SharedOrderVector::SharedOrderVector(string id, vector ov) : DataVector(id), data(ov) { updateStats(); } /***********************************************************************/ //This function is used to read a .shared file for the collect.shared, rarefaction.shared and summary.shared commands //if you don't use a list and groupfile. SharedOrderVector::SharedOrderVector(ifstream& f, vector& userGroups, string& previousLabel) : DataVector() { //reads in a shared file try { maxRank = 0; numBins = 0; numSeqs = 0; int numUserGroups = userGroups.size(); int num, inputData; numSeqs = 0; string holdLabel, nextLabel, groupN; individual newguy; //read in first row since you know there is at least 1 group. //are we at the beginning of the file?? if (previousLabel == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "group" f >> label; gobble(f); //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); currentLabels.push_back(temp); } f >> label; } }else { label = previousLabel; } //read in first row since you know there is at least 1 group. f >> groupN >> num; bool readData = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } //else - skipline because you are a group we dont care about } holdLabel = label; if (readData) { //save group in groupmap setNamesOfGroups(groupN); for(int i=0;i> inputData; //abundance of the otu for (int j = 0; j < inputData; j++) { //for each abundance push_back(i, groupN); numSeqs++; } } } else { util.getline(f); } gobble(f); if (!(f.eof())) { f >> nextLabel; } //read the rest of the groups info in while ((nextLabel == holdLabel) && (f.eof() != true)) { f >> groupN >> num; bool readData = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } //else - skipline because you are a group we dont care about } if (readData) { //save group in groupmap setNamesOfGroups(groupN); for(int i=0;i> inputData; for (int j = 0; j < inputData; j++) { push_back(i, groupN); numSeqs++; } } }else { util.getline(f); } gobble(f); if (f.eof() != true) { f >> nextLabel; } } previousLabel = nextLabel; sort(userGroups.begin(), userGroups.end()); for (int i = 0; i < userGroups.size(); i++) { setNamesOfGroups(userGroups[i]); } updateStats(); } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "SharedOrderVector"); exit(1); } } /***********************************************************************/ int SharedOrderVector::getNumBins(){ return numBins; } /***********************************************************************/ int SharedOrderVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ int SharedOrderVector::getMaxRank(){ return maxRank; } /***********************************************************************/ void SharedOrderVector::set(int index, int binNumber, int abund, string groupName){ setNamesOfGroups(groupName); data[index].group = groupName; data[index].binNumber = binNumber; //if (abund > maxRank) { maxRank = abund; } updateStats(); } /***********************************************************************/ individual SharedOrderVector::get(int index){ return data[index]; } /************************************************************/ void SharedOrderVector::setNamesOfGroups(string seqGroup) { int i, count; count = 0; for (i=0; ierrorOut(e, "SharedOrderVector", "print"); exit(1); } } /***********************************************************************/ void SharedOrderVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; data.clear(); } /***********************************************************************/ void SharedOrderVector::resize(int){ m->mothurOut("resize() did nothing in class SharedOrderVector"); } /***********************************************************************/ vector::iterator SharedOrderVector::begin(){ return data.begin(); } /***********************************************************************/ vector::iterator SharedOrderVector::end(){ return data.end(); } /***********************************************************************/ int SharedOrderVector::size(){ return data.size(); } /***********************************************************************/ RAbundVector SharedOrderVector::getRAbundVector(){ try { RAbundVector rav(data.size()); for(int i=0;i=0;i--){ if(rav.get(i) == 0){ rav.pop_back(); } else{ break; } } rav.setLabel(label); return rav; } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ OrderVector SharedOrderVector::getOrderVector(map* nameMap = nullptr) { try { OrderVector ov; for (int i = 0; i < data.size(); i++) { ov.push_back(data[i].binNumber); } util.mothurRandomShuffle(ov); ov.setLabel(label); return ov; } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "getOrderVector"); exit(1); } } /***********************************************************************/ SAbundVector SharedOrderVector::getSAbundVector(){ RAbundVector rav(this->getRAbundVector()); return rav.getSAbundVector(); } /***********************************************************************/ SharedRAbundVectors* SharedOrderVector::getSharedRAbundVector(string group) { try { SharedRAbundVector* sharedRav = new SharedRAbundVector(data.size()); sharedRav->setLabel(label); sharedRav->setGroup(group); for (int i = 0; i < data.size(); i++) { if (data[i].group == group) { sharedRav->set(data[i].binNumber, sharedRav->get(data[i].binNumber) + 1); } } SharedRAbundVectors* lookup = new SharedRAbundVectors(); lookup->setOTUNames(currentLabels); lookup->push_back(sharedRav); return lookup; } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "getSharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundVectors* SharedOrderVector::getSharedRAbundVector() { try { SharedRAbundVectors* lookup = new SharedRAbundVectors(); sort(allGroups.begin(), allGroups.end()); //create and initialize vector of sharedvectors, one for each group for (int i = 0; i < allGroups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(numBins); temp->setLabel(getLabel()); temp->setGroup(allGroups[i]); lookup->push_back(temp); } int numSeqs = size(); //sample all the members for(int i=0;iget(chosen.binNumber, chosen.group); lookup->set(chosen.binNumber, (abundance + 1), chosen.group); } lookup->setOTUNames(currentLabels); return lookup; } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "getSharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedOrderVector SharedOrderVector::getSharedOrderVector(){ util.mothurRandomShuffle(*this); return *this; } /***********************************************************************/ void SharedOrderVector::updateStats(){ try { needToUpdate = 0; numSeqs = 0; numBins = 0; maxRank = 0; numSeqs = data.size(); std::set uniqueBins; for(int i=0;i maxRank) { maxRank = data[i].binNumber; } } numBins = uniqueBins.size(); } catch(exception& e) { m->errorOut(e, "SharedOrderVector", "updateStats"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedordervector.h000077500000000000000000000051331424121717000234010ustar00rootroot00000000000000#ifndef SHAREDORDER_H #define SHAREDORDER_H /* * sharedorder.h * Mothur * * Created by Sarah Westcott on 12/9/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This class is a child to datavector. It represents OTU information at a certain distance. It is similiar to an order vector except each member of data knows which group it belongs to. Each member of the internal container "data" represents is an individual which knows the OTU from which it came, the group it is in and the abundance is equal to the OTU number. */ #include "datavector.hpp" #include "sabundvector.hpp" #include "rabundvector.hpp" #include "sharedrabundvectors.hpp" #include "groupmap.h" struct individual { string group; int binNumber; bool operator()(const individual& i1, const individual& i2) { return (i1.binNumber > i2.binNumber); } individual() { group = ""; binNumber = 0; } }; class SharedOrderVector : public DataVector { public: SharedOrderVector(); // SharedOrderVector(int ns, int nb=0, int mr=0) : DataVector(), data(ns, -1), maxRank(0), numBins(0), numSeqs(0) {}; SharedOrderVector(const SharedOrderVector& ov) : DataVector(ov.label), data(ov.data), maxRank(ov.maxRank), numBins(ov.numBins), numSeqs(ov.numSeqs), needToUpdate(ov.needToUpdate) {if(needToUpdate == 1){ updateStats();}}; SharedOrderVector(string, vector); SharedOrderVector(ifstream&, vector&, string&); ~SharedOrderVector(){}; individual get(int); void resize(int); int size(); void print(ostream&); vector::iterator begin(); vector::iterator end(); void push_back(int, string); //abundance/OTUNUmber, group MUST CALL UPDATE STATS AFTER PUSHBACK!!! void updateStats(); void clear(); int getNumBins(); int getNumSeqs(); int getMaxRank(); vector getGroups() { sort(allGroups.begin(), allGroups.end()); return allGroups; } RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); SharedOrderVector getSharedOrderVector(); SharedRAbundVectors* getSharedRAbundVector(string); //get the sharedRabundvector for a sepecific group SharedRAbundVectors* getSharedRAbundVector(); //returns sharedRabundVectors for all the users groups private: //GroupMap* groupmap; vector currentLabels; vector data; vector allGroups; map< int, vector >::iterator it; int maxRank; int numBins; int numSeqs; bool needToUpdate; void set(int, int, int, string); //index, OTU, abundance, group void setNamesOfGroups(string seqGroup); }; #endif mothur-1.48.0/source/datastructures/sharedrabundfloatvector.cpp000077500000000000000000000212721424121717000251240ustar00rootroot00000000000000// // sharedrabundfloatvector.cpp // Mothur // // Created by Sarah Westcott on 7/25/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "sharedrabundfloatvector.hpp" /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0), group("") {} /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector(int n) : DataVector(), data(n,0) , maxRank(0), numBins(n), numSeqs(0), group("") {} /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector(vector rav) : DataVector(), maxRank(0), numBins(rav.size()), numSeqs(0), group("") { try { data.assign(numBins, 0); for(int i=0;ierrorOut(e, "SharedRAbundFloatVector", "SharedRAbundFloatVector"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector(vector rav, float mr, int nb, float ns) : DataVector(), group(""){ try { numBins = nb; maxRank = mr; numSeqs = ns; data = rav; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "SharedRAbundFloatVector"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { f >> label >> group >> numBins; data.assign(numBins, 0); float inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "SharedRAbundFloatVector"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f, string l, string g, int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) { try { label = l; group = g; data.assign(numBins, 0); float inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "SharedRAbundFloatVector"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVector::~SharedRAbundFloatVector() { } /***********************************************************************/ void SharedRAbundFloatVector::set(int binNumber, float newBinSize){ try { int oldBinSize = data[binNumber]; data[binNumber] = newBinSize; if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs += (newBinSize - oldBinSize); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "set"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVector::get(int index){ return data[index]; } /***********************************************************************/ void SharedRAbundFloatVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; group = ""; data.clear(); } /***********************************************************************/ void SharedRAbundFloatVector::push_back(float binSize){ try { data.push_back(binSize); numBins++; if(binSize > maxRank){ maxRank = binSize; } numSeqs += binSize; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "push_back"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVector::remove(int bin){ try { float abund = data[bin]; data.erase(data.begin()+bin); numBins--; if(abund == maxRank){ maxRank = util.max(data); } numSeqs -= abund; return abund; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "remove"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVector::remove(vector bins){ try { if (bins.size() == 0) { return 0; } int numRemoved = 0; vector newData; int binIndex = 0; for (int i = 0; i < data.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newData.push_back(data[i]); }else if (i == bins[binIndex]) { binIndex++; numRemoved += data[i]; if (binIndex > bins.size()) { //removed all bins newData.insert(newData.end(), data.begin()+i, data.end()); //add rest of good bins break; } } } data = newData; numBins = data.size(); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs -= numRemoved; return numRemoved; } catch(exception& e) { m->errorOut(e, "SharedRASharedRAbundFloatVectorbundVector", "remove"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVector::resize(int size){ data.resize(size); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs = util.sum(data); numBins = size; } /***********************************************************************/ int SharedRAbundFloatVector::size(){ return data.size(); } /***********************************************************************/ void SharedRAbundFloatVector::print(ostream& output){ try { output << label; output << '\t' << group << '\t' << numBins; for(int i=0;ierrorOut(e, "SharedRAbundVector", "nonSortedPrint"); exit(1); } } /***********************************************************************/ int SharedRAbundFloatVector::getNumBins(){ return numBins; } /***********************************************************************/ float SharedRAbundFloatVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ float SharedRAbundFloatVector::getMaxRank(){ return maxRank; } /***********************************************************************/ RAbundVector SharedRAbundFloatVector::getRAbundVector(){ RAbundVector rav; for(int i = 0; i < data.size(); i++) { rav.push_back(int(data[i])); } rav.setLabel(label); return rav; } /***********************************************************************/ RAbundFloatVector SharedRAbundFloatVector::getRAbundFloatVector(){ RAbundFloatVector rav; for(int i = 0; i < data.size(); i++) { rav.push_back(data[i]); } rav.setLabel(label); return rav; } /***********************************************************************/ SharedRAbundVector SharedRAbundFloatVector::getSharedRAbundVector(){ SharedRAbundVector rav; rav.setLabel(label); rav.setGroup(group); for(int i = 0; i < data.size(); i++) { rav.push_back(int(data[i])); } return rav; } /***********************************************************************/ SAbundVector SharedRAbundFloatVector::getSAbundVector() { try { SAbundVector sav(int(maxRank+1)); for(int i=0;ierrorOut(e, "SharedRAbundFloatVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ OrderVector SharedRAbundFloatVector::getOrderVector(map* nameMap = nullptr) { try { m->mothurOut("[ERROR]: can not convert SharedRAbundVectors to an ordervector, ordervectors assume no zero OTUS.\n"); m->setControl_pressed(true); OrderVector o; return o; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedrabundfloatvector.hpp000077500000000000000000000053251424121717000251320ustar00rootroot00000000000000// // sharedrabundfloatvector.hpp // Mothur // // Created by Sarah Westcott on 7/25/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef sharedrabundfloatvector_hpp #define sharedrabundfloatvector_hpp #include "datavector.hpp" #include "rabundvector.hpp" #include "sabundvector.hpp" #include "ordervector.hpp" /* Data Structure for a rabund file. This class is a child to datavector. It represents OTU information at a certain distance. A rabundvector can be converted into and ordervector, listvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = 6, because there are six member in that OTU. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 a,b,g,j,l = sample A c,d,h,i = sample B e,f,k,m = sample C The sharedRabund class is very similar to rabund. SharedRabund allows for 0 otus in a sample, rabunds do not. SharedRabund also know their group. SharedRabunds are stored as floats, but printed as integers or floats depending on whether it represents a shared or relabund file. sharedrabund = A 5 2 1 1 1 0 B 5 2 2 0 0 0 C 5 2 0 1 0 1 */ //class RAbundFloatVector; //class OrderVector; class SharedRAbundFloatVector : public DataVector { public: SharedRAbundFloatVector(); SharedRAbundFloatVector(int); SharedRAbundFloatVector(vector, float, int, float); //maxRank, numbins, numSeqs SharedRAbundFloatVector(vector); SharedRAbundFloatVector(const SharedRAbundFloatVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group) {}; SharedRAbundFloatVector(ifstream&); SharedRAbundFloatVector(ifstream& f, string l, string g, int); //filehandle, label ~SharedRAbundFloatVector(); int getNumBins(); float getNumSeqs(); float getMaxRank(); float remove(int); float remove(vector); void set(int, float); float get(int); vector get() { return data; } void push_back(float); void resize(int); int size(); void clear(); void print(ostream&); //nonsorted RAbundVector getRAbundVector(); SharedRAbundVector getSharedRAbundVector(); RAbundFloatVector getRAbundFloatVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); string getGroup() { return group; } //group = "" for rabunds without groupInfo void setGroup(string g) { group = g; } private: vector data; float maxRank; int numBins; float numSeqs; string group; }; #endif /* sharedrabundfloatvector_hpp */ mothur-1.48.0/source/datastructures/sharedrabundfloatvectors.cpp000077500000000000000000000514721424121717000253140ustar00rootroot00000000000000// // sharedrabundfloatvectors.cpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "sharedrabundfloatvectors.hpp" /***********************************************************************/ //reads a shared file SharedRAbundFloatVectors::SharedRAbundFloatVectors(ifstream& f, vector& userGroups, string& nextLabel, string& labelTag) : DataVector() { try { int num; string holdLabel, groupN; int numUserGroups = userGroups.size(); for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); //are we at the beginning of the file?? if (nextLabel == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "group" f >> label; gobble(f); //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); currentLabels.push_back(temp); } if (currentLabels.size() != 0) { string binLabelTag = currentLabels[0]; labelTag = ""; for (int i = 0; i < binLabelTag.length(); i++) { if (isalpha(binLabelTag[i])){ labelTag += binLabelTag[i]; } } } f >> label >> groupN >> num; }else { //read in first row since you know there is at least 1 group. f >> groupN >> num; //make binlabels because we don't have any string snumBins = toString(num); if (labelTag == "") { labelTag = "Otu"; } for (int i = 0; i < num; i++) { //if there is a bin label use it otherwise make one string binLabel = labelTag; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; currentLabels.push_back(binLabel); } } }else { label = nextLabel; //read in first row since you know there is at least 1 group. f >> groupN >> num; } bool readData = false; bool remove = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; }// skipline because you are a group we dont care about } holdLabel = label; numBins = num; if (readData) { //add new vector to lookup SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(f, label, groupN, numBins); push_back(temp); } else { util.getline(f); } gobble(f); if (!(f.eof())) { f >> nextLabel; } //read the rest of the groups info in while ((nextLabel == holdLabel) && (f.eof() != true)) { f >> groupN >> num; bool readData = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; }// skipline because you are a group we dont care about } if (readData) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(f, label, groupN, numBins); push_back(temp); }else { util.getline(f); } gobble(f); if (f.eof() != true) { f >> nextLabel; } } if (remove) { eliminateZeroOTUS(); } otuTag = labelTag; //error in names of user inputted Groups if (lookup.size() < userGroups.size()) { m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); m->setControl_pressed(true); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "SharedRAbundFloatVectors"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::print(ostream& output, bool& printOTUHeaders){ try { printHeaders(output, printOTUHeaders); sort(lookup.begin(), lookup.end(), compareRAbundFloats); for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { break; } lookup[i]->print(output); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "print"); exit(1); } } /***********************************************************************/ string SharedRAbundFloatVectors::getOTUName(int bin){ try { if (currentLabels.size() > bin) { } else { getOTUNames(); } return currentLabels[bin]; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getOTUName"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::setOTUName(int bin, string otuName){ try { if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { getOTUNames(); //fills currentLabels if needed if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { m->setControl_pressed(true); m->mothurOut("[ERROR]: " + toString(bin) + " bin does not exist\n"); } } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "setOTUName"); exit(1); } } /***********************************************************************/ int SharedRAbundFloatVectors::push_back(vector abunds, string binLabel){ try { if (abunds.size() != lookup.size()) { m->mothurOut("[ERROR]: you have provided " + toString(abunds.size()) + " abundances, but mothur was expecting " + toString(lookup.size()) + ", please correct.\n"); m->setControl_pressed(true); return 0; } for (int i = 0; i < lookup.size(); i ++) { lookup[i]->push_back(abunds[i]); } //vector currentLabels = m->getCurrentSharedBinLabels(); if (binLabel == "") { //create one int otuNum = 1; bool notDone = true; //find label prefix string prefix = "Otu"; if (currentLabels.size() != 0) { if (currentLabels[currentLabels.size()-1][0] == 'P') { prefix = "PhyloType"; } string tempLabel = currentLabels[currentLabels.size()-1]; string simpleLastLabel = util.getSimpleLabel(tempLabel); util.mothurConvert(simpleLastLabel, otuNum); otuNum++; } string potentialLabel = toString(otuNum); while (notDone) { if (m->getControl_pressed()) { notDone = false; break; } potentialLabel = toString(otuNum); vector::iterator it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { potentialLabel = prefix + toString(otuNum); it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { notDone = false; break; } } otuNum++; } binLabel = potentialLabel; } currentLabels.push_back(binLabel); numBins++; return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "push_back"); exit(1); } } /***********************************************************************/ int SharedRAbundFloatVectors::push_back(SharedRAbundFloatVector* thisLookup){ try { if (numBins == 0) { numBins = thisLookup->getNumBins(); } lookup.push_back(thisLookup); sort(lookup.begin(), lookup.end(), compareRAbundFloats); if (label == "") { label = thisLookup->getLabel(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "push_back"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::getOTUTotal(int bin){ try { float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i++) { totalOTUAbund += lookup[i]->get(bin); } return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getOTUTotal"); exit(1); } } /***********************************************************************/ vector SharedRAbundFloatVectors::getOTU(int bin){ try { vector abunds; for (int i = 0; i < lookup.size(); i++) { abunds.push_back(lookup[i]->get(bin)); } return abunds; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getOTU"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::setLabels(string l){ try { label = l; for (int i = 0; i < lookup.size(); i++) { lookup[i]->setLabel(l); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "setLabels"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::get(int bin, string group){ try { float abund = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { abund = lookup[it->second]->get(bin); } return abund; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "get"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::getNumSeqs(string group){ try { float numSeqs = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { numSeqs = lookup[it->second]->getNumSeqs(); } return numSeqs; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getNumSeqs"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::set(int bin, float binSize, string group){ try { map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { lookup[it->second]->set(bin, binSize); } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "set"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::removeOTU(int bin){ try { float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bin); } currentLabels.erase(currentLabels.begin()+bin); numBins--; return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "removeOTU"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::removeOTUs(vector bins, bool sorted){ try { if (bins.size() == 0) { return 0; } if (!sorted) { sort(bins.begin(), bins.end()); } float totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bins); } vector newLabels; int binIndex = 0; for (int i = 0; i < currentLabels.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newLabels.push_back(currentLabels[i]); }else if (i == bins[binIndex]) { binIndex++; if (binIndex > bins.size()) { //removed all bins newLabels.insert(newLabels.end(), currentLabels.begin()+i, currentLabels.end()); //add rest of good bins break; } } } currentLabels = newLabels; numBins = currentLabels.size(); return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "removeOTUs"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::setOTUNames(vector names){ try { currentLabels.clear(); currentLabels = names; getOTUNames(); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "setOTUNames"); exit(1); } } /***********************************************************************/ vector SharedRAbundFloatVectors::getOTUNames(){ try { util.getOTUNames(currentLabels, numBins, otuTag); return currentLabels; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getOTUNames"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::printHeaders(ostream& output, bool& printSharedHeaders){ try { if (printSharedHeaders) { getOTUNames(); output << "label\tGroup\tnum" + otuTag + "s"; for (int i = 0; i < numBins; i++) { output << '\t' << currentLabels[i]; } output << endl; printSharedHeaders = false; } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "printHeaders"); exit(1); } } /***********************************************************************/ vector SharedRAbundFloatVectors::getNamesGroups(){ try { vector names; for (int i = 0; i < lookup.size(); i ++) { names.push_back(lookup[i]->getGroup()); } return names; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getNamesGroups"); exit(1); } } /***********************************************************************/ float SharedRAbundFloatVectors::getNumSeqsSmallestGroup(){ try { float smallest = MOTHURMAX; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < smallest) { smallest = lookup[i]->getNumSeqs(); } } return smallest; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getNumSeqsSmallestGroup"); exit(1); } } /***********************************************************************/ RAbundVector SharedRAbundFloatVectors::getRAbundVector(){ try { RAbundVector rav; for (int i = 0; i < numBins; i++) { float abund = getOTUTotal(i); rav.push_back((int)abund); } return rav; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ SAbundVector SharedRAbundFloatVectors::getSAbundVector(){ try { RAbundVector rav = getRAbundVector(); return rav.getSAbundVector(); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ vector SharedRAbundFloatVectors::getSharedRAbundFloatVectors(){ try { vector newLookup; for (int i = 0; i < lookup.size(); i++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(*lookup[i]); newLookup.push_back(temp); } return newLookup; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ vector SharedRAbundFloatVectors::getSharedRAbundVectors(){ try { vector newLookup; for (int i = 0; i < lookup.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(lookup[i]->getSharedRAbundVector()); newLookup.push_back(temp); } return newLookup; } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ void SharedRAbundFloatVectors::removeGroups(vector g){ try { bool remove = false; for (vector::iterator it = lookup.begin(); it != lookup.end();) { //if this sharedrabund is not from a group the user wants then delete it. if (util.inUsersGroups((*it)->getGroup(), g) ) { remove = true; delete (*it); (*it) = nullptr; it = lookup.erase(it); }else { ++it; } } if (remove) { eliminateZeroOTUS(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "removeGroups"); exit(1); } } /***********************************************************************/ int SharedRAbundFloatVectors::removeGroups(int minSize, bool silent){ try { vector Groups; bool remove = false; for (vector::iterator it = lookup.begin(); it != lookup.end();) { if ((*it)->getNumSeqs() < minSize) { if (!silent) { m->mothurOut((*it)->getGroup() + " contains " + toString((*it)->getNumSeqs()) + ". Eliminating.\n"); } delete (*it); (*it) = nullptr; it = lookup.erase(it); }else { Groups.push_back((*it)->getGroup()); ++it; } } if (remove) { eliminateZeroOTUS(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "removeGroups"); exit(1); } } /**********************************************************************************************************************/ void SharedRAbundFloatVectors::eliminateZeroOTUS() { try { if (currentLabels.size() != numBins) { currentLabels = getOTUNames(); } if (lookup.size() > 1) { vector otusToRemove; for (int i = 0; i < lookup[0]->getNumBins(); i++) { if (m->getControl_pressed()) { break; } float total = getOTUTotal(i); //if they are not all zero add this bin if (total == 0) { otusToRemove.push_back(i); } //sorted order } removeOTUs(otusToRemove, true); //sorted } } catch(exception& e) { m->errorOut(e, "SharedRAbundFloatVectors", "eliminateZeroOTUS"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedrabundfloatvectors.hpp000077500000000000000000000071401424121717000253120ustar00rootroot00000000000000// // sharedrabundfloatvectors.hpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef sharedrabundfloatvectors_hpp #define sharedrabundfloatvectors_hpp #include "datavector.hpp" #include "rabundvector.hpp" #include "sharedrabundfloatvector.hpp" #include "sharedordervector.h" #include "ordervector.hpp" /* DataStructure for a relabund file. */ //******************************************************************************************************************** inline bool compareRAbundFloats(SharedRAbundFloatVector* left, SharedRAbundFloatVector* right){ return (left->getGroup() < right->getGroup()); } //******************************************************************************************************************** class SharedRAbundFloatVectors : public DataVector { public: SharedRAbundFloatVectors() : DataVector() { label = ""; numBins = 0; otuTag = "Otu"; } SharedRAbundFloatVectors(string oTag) : DataVector() { label = ""; numBins = 0; otuTag = oTag; } SharedRAbundFloatVectors(ifstream&, vector&, string&, string&); SharedRAbundFloatVectors(SharedRAbundFloatVectors& bv) : DataVector(bv), numBins(bv.numBins), otuTag(bv.otuTag) { vector data = bv.getSharedRAbundFloatVectors(); for (int i = 0; i < data.size(); i++) { push_back(data[i]); } setLabels(bv.getLabel()); setOTUNames(bv.getOTUNames()); eliminateZeroOTUS(); } ~SharedRAbundFloatVectors() { clear(); } void setLabels(string l); float getOTUTotal(int bin); vector getOTU(int bin); float removeOTU(int bin); float removeOTUs(vector, bool sorted=false); //bins to remove, sorted or not float get(int bin, string group); void set(int bin, float binSize, string group); void setOTUNames(vector names); vector getOTUNames(); string getOTUName(int); void setOTUName(int, string); int push_back(vector, string binLabel=""); //add otu. mothur assumes abunds are in same order as groups. int push_back(SharedRAbundFloatVector*); void removeGroups(vector g); int removeGroups(int minSize, bool silent=false); // removes any groups with numSeqs < minSize void resize(int n) { m->mothurOut("[ERROR]: can not use resize for SharedRAbundFloatVectors.\n"); } void clear() { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); groupNames.clear(); numBins = 0; currentLabels.clear(); } int size() { return (int)lookup.size(); } int getNumGroups() { return (int)lookup.size(); } int getNumBins() { return numBins; } float getNumSeqs(string); //group float getNumSeqsSmallestGroup(); void print(ostream&, bool&); vector getSharedRAbundVectors(); vector getSharedRAbundFloatVectors(); RAbundVector getRAbundVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*) { m->mothurOut("[ERROR]: can not convert SharedRAbundVectors to an ordervector, ordervectors assume no zero OTUS.\n"); m->setControl_pressed(true); OrderVector o; return o; } vector getNamesGroups(); void eliminateZeroOTUS(); //run after push_backs if groups are chosen private: void printHeaders(ostream&, bool&); vector lookup; vector currentLabels; map groupNames; int numBins; string otuTag; }; #endif /* sharedrabundfloatvectors_hpp */ mothur-1.48.0/source/datastructures/sharedrabundvector.cpp000077500000000000000000000222021424121717000240700ustar00rootroot00000000000000// // sharedrabundvector.cpp // Mothur // // Created by Sarah Westcott on 7/24/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "sharedrabundvector.hpp" /***********************************************************************/ SharedRAbundVector::SharedRAbundVector() : DataVector(), maxRank(0), numBins(0), numSeqs(0), group("") {} /***********************************************************************/ SharedRAbundVector::SharedRAbundVector(int n) : DataVector(), data(n,0) , maxRank(0), numBins(n), numSeqs(0), group("") {} /***********************************************************************/ SharedRAbundVector::SharedRAbundVector(vector rav) : DataVector(), maxRank(0), numBins(rav.size()), numSeqs(0), group("") { try { data.assign(numBins, 0); for(int i=0;ierrorOut(e, "SharedRAbundVector", "SharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundVector::SharedRAbundVector(vector rav, int mr, int nb, int ns) : DataVector(), group(""){ try { numBins = nb; maxRank = mr; numSeqs = ns; data = rav; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { f >> label >> group >> numBins; data.assign(numBins, 0); int inputData; for(int i=0;i> inputData; set(i, inputData); } } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "SharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundVector::SharedRAbundVector(ifstream& f, string l, string g, int n) : DataVector(), maxRank(0), numBins(n), numSeqs(0) { try { label = l; group = g; data.assign(numBins, 0); string otuCountsData = util.getline(f); vector o = util.splitWhiteSpace(otuCountsData); if (o.size() != n) { m->mothurOut("[ERROR] : group " + group + " contains otu data for " + toString(o.size()) + " otus, but your sharedfile indicates you have " + toString(n) + " otus. Please correct.\n"); m->setControl_pressed(true); } else { int inputData; for(int i=0;ierrorOut(e, "SharedRAbundVector", "SharedRAbundVector"); exit(1); } } /***********************************************************************/ SharedRAbundVector::~SharedRAbundVector() { } /***********************************************************************/ void SharedRAbundVector::set(int binNumber, int newBinSize){ try { int oldBinSize = data[binNumber]; data[binNumber] = newBinSize; if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs += (newBinSize - oldBinSize); } catch(exception& e) { m->errorOut(e, "RAbundVector", "set"); exit(1); } } /***********************************************************************/ int SharedRAbundVector::increment(int binNumber){ try { data[binNumber]++; int newBinSize = data[binNumber]; if(newBinSize > maxRank) { maxRank = newBinSize; } numSeqs++; return newBinSize; } catch(exception& e) { m->errorOut(e, "RAbundVector", "increment"); exit(1); } } /***********************************************************************/ int SharedRAbundVector::get(int index){ return data[index]; } /***********************************************************************/ void SharedRAbundVector::clear(){ numBins = 0; maxRank = 0; numSeqs = 0; group = ""; data.clear(); } /***********************************************************************/ void SharedRAbundVector::push_back(int binSize){ try { data.push_back(binSize); numBins++; if(binSize > maxRank){ maxRank = binSize; } numSeqs += binSize; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "push_back"); exit(1); } } /***********************************************************************/ int SharedRAbundVector::remove(int bin){ try { int abund = data[bin]; data.erase(data.begin()+bin); numBins--; if(abund == maxRank){ vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; } numSeqs -= abund; return abund; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "remove"); exit(1); } } /***********************************************************************/ int SharedRAbundVector::remove(vector bins){ try { if (bins.size() == 0) { return 0; } int numRemoved = 0; vector newData; int binIndex = 0; for (int i = 0; i < data.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newData.push_back(data[i]); }else if (i == bins[binIndex]) { binIndex++; numRemoved += data[i]; if (binIndex > bins.size()) { //removed all bins newData.insert(newData.end(), data.begin()+i, data.end()); //add rest of good bins break; } } } data = newData; numBins = data.size(); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs -= numRemoved; return numRemoved; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "remove"); exit(1); } } /***********************************************************************/ void SharedRAbundVector::resize(int size){ data.resize(size); vector::iterator it = max_element(data.begin(), data.end()); maxRank = *it; numSeqs = util.sum(data); numBins = size; } /***********************************************************************/ int SharedRAbundVector::size(){ return data.size(); } /***********************************************************************/ void SharedRAbundVector::print(ostream& output){ try { output << label; output << '\t' << group << '\t' << numBins; for(int i=0;ierrorOut(e, "SharedRAbundVector", "nonSortedPrint"); exit(1); } } /***********************************************************************/ int SharedRAbundVector::getNumBins(){ return numBins; } /***********************************************************************/ int SharedRAbundVector::getNumSeqs(){ return numSeqs; } /***********************************************************************/ int SharedRAbundVector::getMaxRank(){ return maxRank; } /***********************************************************************/ RAbundVector SharedRAbundVector::getRAbundVector(){ RAbundVector rav; for(int i = 0; i < data.size(); i++) { if (data[i] != 0) { rav.push_back(int(data[i])); } } rav.setLabel(label); return rav; } /***********************************************************************/ RAbundFloatVector SharedRAbundVector::getRAbundFloatVector(){ RAbundFloatVector rav; for(int i = 0; i < data.size(); i++) { if (data[i] != 0) { rav.push_back(float(data[i])); } } rav.setLabel(label); rav.setGroup(group); return rav; } /***********************************************************************/ SAbundVector SharedRAbundVector::getSAbundVector() { try { SAbundVector sav(int(maxRank+1)); for(int i=0;ierrorOut(e, "RAbundVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ OrderVector SharedRAbundVector::getOrderVector(map* nameMap = nullptr) { try { m->mothurOut("[ERROR]: can not convert SharedRAbundVectors to an ordervector, ordervectors assume no zero OTUS.\n"); m->setControl_pressed(true); OrderVector o; return o; } catch(exception& e) { m->errorOut(e, "RAbundVector", "getOrderVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedrabundvector.hpp000077500000000000000000000053571424121717000241110ustar00rootroot00000000000000// // sharedrabundvector.hpp // Mothur // // Created by Sarah Westcott on 7/24/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef sharedrabundvector_hpp #define sharedrabundvector_hpp #include "datavector.hpp" #include "ordervector.hpp" #include "rabundvector.hpp" #include "rabundfloatvector.hpp" #include "sabundvector.hpp" /* Data Structure for a rabund file. This class is a child to datavector. It represents OTU information at a certain distance. A rabundvector can be converted into and ordervector, listvector or sabundvector. Each member of the internal container "data" represents an individual OTU. So data[0] = 6, because there are six member in that OTU. example: listvector = a,b,c,d,e,f g,h,i j,k l m rabundvector = 6 3 2 1 1 sabundvector = 2 1 1 0 0 1 ordervector = 1 1 1 1 1 1 2 2 2 3 3 4 5 a,b,g,j,l = sample A c,d,h,i = sample B e,f,k,m = sample C The sharedRabund class is very similar to rabund. SharedRabund allows for 0 otus in a sample, rabunds do not. SharedRabund also know their group. SharedRabunds are stored as floats, but printed as integers or floats depending on whether it represents a shared or relabund file. sharedrabund = A 5 2 1 1 1 0 B 5 2 2 0 0 0 C 5 2 0 1 0 1 */ //class RAbundFloatVector; class OrderVector; class SharedRAbundVector : public DataVector { #ifdef UNIT_TEST friend class TestSharedRabundVector; #endif public: SharedRAbundVector(); SharedRAbundVector(int); SharedRAbundVector(vector, int, int, int); SharedRAbundVector(vector); SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs), group(bv.group) { }; SharedRAbundVector(ifstream&); SharedRAbundVector(ifstream& f, string l, string g, int); //filehandle, label, numBins ~SharedRAbundVector(); int getNumBins(); int getNumSeqs(); int getMaxRank(); void set(int, int); int get(int); vector get() { return data; } string getGroup() { return group; } //group = "" for rabunds without groupInfo void setGroup(string g) { group = g; } int increment(int); //add 1 to bin void push_back(int); void resize(int); int size(); void clear(); int remove(int); int remove(vector); void print(ostream&); //nonsorted RAbundVector getRAbundVector(); RAbundFloatVector getRAbundFloatVector(); SAbundVector getSAbundVector(); OrderVector getOrderVector(map*); protected: vector data; int maxRank; int numBins; int numSeqs; string group; }; #endif /* sharedrabundvector_hpp */ mothur-1.48.0/source/datastructures/sharedrabundvectors.cpp000066400000000000000000000626601424121717000242640ustar00rootroot00000000000000// // sharedrabundvectors.cpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "sharedrabundvectors.hpp" /***********************************************************************/ //reads a shared file SharedRAbundVectors::SharedRAbundVectors(ifstream& f, vector& userGroups, string& nextLabel, string& labelTag) : DataVector(){ try { int num, count; count = 0; string holdLabel, groupN; int numUserGroups = userGroups.size(); for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); //are we at the beginning of the file?? if (nextLabel == "") { f >> label; //is this a shared file that has headers if (label == "label") { //gets "group" f >> label; gobble(f); //gets "numOtus" f >> label; gobble(f); //eat rest of line label = util.getline(f); gobble(f); //parse labels to save istringstream iStringStream(label); while(!iStringStream.eof()){ if (m->getControl_pressed()) { break; } string temp; iStringStream >> temp; gobble(iStringStream); currentLabels.push_back(temp); } if (currentLabels.size() != 0) { string binLabelTag = currentLabels[0]; labelTag = ""; for (int i = 0; i < binLabelTag.length(); i++) { if (isalpha(binLabelTag[i])){ labelTag += binLabelTag[i]; } } } f >> label >> groupN >> num; if (currentLabels.size() != 0) { if (currentLabels.size() != num) { m->mothurOut("[ERROR]: your shared file contains " + toString(currentLabels.size()) + " OTU labels, but your numOtus column indicates " + toString(num) + ". Cannot continue, please correct. This can be caused by editing your file incorrectly outside of mothur.\n"); m->setControl_pressed(true); } } }else { //read in first row since you know there is at least 1 group. f >> groupN >> num; //make binlabels because we don't have any string snumBins = toString(num); if (labelTag == "") { labelTag = "Otu"; } for (int i = 0; i < num; i++) { //if there is a bin label use it otherwise make one string binLabel = labelTag; string sbinNumber = toString(i+1); if (sbinNumber.length() < snumBins.length()) { int diff = snumBins.length() - sbinNumber.length(); for (int h = 0; h < diff; h++) { binLabel += "0"; } } binLabel += sbinNumber; currentLabels.push_back(binLabel); } } }else { label = nextLabel; //read in first row since you know there is at least 1 group. f >> groupN >> num; } //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling //m->setCurrentSharedBinLabels(m->getSharedBinLabelsInFile()); holdLabel = label; numBins = num; bool readData = false; bool remove = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; } } if (readData) { //add new vector to lookup SharedRAbundVector* temp = new SharedRAbundVector(f, label, groupN, numBins); push_back(temp); } else { util.getline(f); } gobble(f); if (!(f.eof())) { f >> nextLabel; } //read the rest of the groups info in while ((nextLabel == holdLabel) && (f.eof() != true)) { f >> groupN >> num; bool readData = false; if (numUserGroups == 0) { //user has not specified groups, so we will use all of them userGroups.push_back(groupN); readData = true; }else{ if (util.inUsersGroups(groupN, userGroups)) { readData = true; } else { remove = true; }// skipline because you are a group we dont care about } if (readData) { //add new vector to lookup SharedRAbundVector* temp = new SharedRAbundVector(f, label, groupN, numBins); push_back(temp); } else { util.getline(f); } gobble(f); if (f.eof() != true) { f >> nextLabel; } } if (remove) { eliminateZeroOTUS(); } otuTag = labelTag; //error in names of user inputted Groups if (lookup.size() < userGroups.size()) { m->mothurOut("[ERROR]: requesting groups not present in files, aborting.\n"); m->setControl_pressed(true); } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "SharedRAbundVectors"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::print(ostream& output, bool& printOTUHeaders){ try { printHeaders(output, printOTUHeaders); sort(lookup.begin(), lookup.end(), compareRAbunds); for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { break; } lookup[i]->print(output); } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "print"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::printTidy(ostream& output, bool& printOTUHeaders, bool keepZero){ try { vector otuLabels = getOTUNames(); if (printOTUHeaders) { output << "label\tsample\tOTU\tabundance\n"; printOTUHeaders = false; } sort(lookup.begin(), lookup.end(), compareRAbunds); for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { break; } vector data = lookup[i]->get(); string thisGroup = lookup[i]->getGroup(); //if keepzero=false, zeroed otus are not outputted for (int j = 0; j < data.size(); j++) { if ((data[j] != 0) || (keepZero)) { output << label << '\t' << thisGroup << '\t' << otuLabels[j] << '\t' << data[j] << endl; } } } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "printTidy"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::setOTUNames(vector names){ try { currentLabels.clear(); currentLabels = names; getOTUNames(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "setOTUNames"); exit(1); } } /***********************************************************************/ string SharedRAbundVectors::getOTUName(int bin){ try { if (currentLabels.size() > bin) { } else { getOTUNames(); } return currentLabels[bin]; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getOTUName"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::setOTUName(int bin, string otuName){ try { if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { getOTUNames(); //fills currentLabels if needed if (currentLabels.size() > bin) { currentLabels[bin] = otuName; } else { m->setControl_pressed(true); m->mothurOut("[ERROR]: " + toString(bin) + " bin does not exist\n"); } } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "setOTUName"); exit(1); } } /***********************************************************************/ vector SharedRAbundVectors::getOTUNames(){ try { util.getOTUNames(currentLabels, numBins, otuTag); return currentLabels; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getOTUNames"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::printHeaders(ostream& output, bool& printSharedHeaders){ try { if (printSharedHeaders) { getOTUNames(); output << "label\tGroup\tnum" + otuTag + "s"; for (int i = 0; i < numBins; i++) { output << '\t' << currentLabels[i]; } output << endl; printSharedHeaders = false; } } catch(exception& e) { m->errorOut(e, "SharedVector", "printHeaders"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::push_back(SharedRAbundVector* thisLookup){ try { if (numBins == 0) { numBins = thisLookup->getNumBins(); } else if (numBins != thisLookup->getNumBins()) { m->mothurOut("[ERROR]: Number of bins does not match. Expected " + toString(numBins) + " found " + toString(thisLookup->getNumBins()) + ".\n"); m->setControl_pressed(true); return 0; } lookup.push_back(thisLookup); sort(lookup.begin(), lookup.end(), compareRAbunds); if (label == "") { label = thisLookup->getLabel(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return ((int)lookup.size()); } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "push_back"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::push_back(vector abunds, string binLabel){ try { if (abunds.size() != lookup.size()) { m->mothurOut("[ERROR]: you have provided " + toString(abunds.size()) + " abundances, but mothur was expecting " + toString(lookup.size()) + ", please correct.\n"); m->setControl_pressed(true); return 0; } for (int i = 0; i < lookup.size(); i ++) { lookup[i]->push_back(abunds[i]); } if (binLabel == "") { //create one int otuNum = 1; bool notDone = true; //find label prefix string prefix = "Otu"; if (currentLabels.size() != 0) { if (currentLabels[currentLabels.size()-1][0] == 'P') { prefix = "PhyloType"; } string tempLabel = currentLabels[currentLabels.size()-1]; string simpleLastLabel = util.getSimpleLabel(tempLabel); util.mothurConvert(simpleLastLabel, otuNum); otuNum++; } string potentialLabel = toString(otuNum); while (notDone) { if (m->getControl_pressed()) { notDone = false; break; } potentialLabel = toString(otuNum); vector::iterator it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { potentialLabel = prefix + toString(otuNum); it = find(currentLabels.begin(), currentLabels.end(), potentialLabel); if (it == currentLabels.end()) { notDone = false; break; } } otuNum++; } binLabel = potentialLabel; } currentLabels.push_back(binLabel); numBins++; return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "push_back"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::getOTUTotal(int bin){ try { int totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i++) { totalOTUAbund += lookup[i]->get(bin); } return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getOTUTotal"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::getOTUTotal(string otuLabel){ try { //find bin number int binNumber = -1; getOTUNames(); for (int i = 0; i < currentLabels.size(); i++) { if (util.getSimpleLabel(currentLabels[i]) == util.getSimpleLabel(otuLabel)) { binNumber = i; break; } } if (binNumber == -1) { return 0; } return getOTUTotal(binNumber); } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getOTUTotal"); exit(1); } } /***********************************************************************/ vector SharedRAbundVectors::getOTU(int bin){ try { vector abunds; for (int i = 0; i < lookup.size(); i++) { abunds.push_back(lookup[i]->get(bin)); } return abunds; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "push_back"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::setLabels(string l){ try { label = l; for (int i = 0; i < lookup.size(); i++) { lookup[i]->setLabel(l); } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "setLabels"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::get(int bin, string group){ try { int abund = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { abund = lookup[it->second]->get(bin); } return abund; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "get"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::getNumSeqs(string group){ try { int numSeqs = 0; map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { numSeqs = lookup[it->second]->getNumSeqs(); } return numSeqs; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getNumSeqs"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::set(int bin, int binSize, string group){ try { map::iterator it = groupNames.find(group); if (it == groupNames.end()) { m->mothurOut("[ERROR]: can not find group " + group + ".\n"); m->setControl_pressed(true); } else { lookup[it->second]->set(bin, binSize); } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "set"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::removeOTU(int bin){ try { int totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bin); } currentLabels.erase(currentLabels.begin()+bin); numBins--; return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "removeOTU"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::removeOTUs(vector bins, bool sorted){ try { if (bins.size() == 0) { return 0; } if (!sorted) { sort(bins.begin(), bins.end()); } int totalOTUAbund = 0; for (int i = 0; i < lookup.size(); i ++) { totalOTUAbund += lookup[i]->remove(bins); } vector newLabels; int binIndex = 0; for (int i = 0; i < currentLabels.size(); i++) { if (m->getControl_pressed()) { break; } if (i != bins[binIndex]) { newLabels.push_back(currentLabels[i]); }else if (i == bins[binIndex]) { binIndex++; if (binIndex > bins.size()) { //removed all bins newLabels.insert(newLabels.end(), currentLabels.begin()+i, currentLabels.end()); //add rest of good bins break; } } } currentLabels = newLabels; numBins = currentLabels.size(); return totalOTUAbund; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "removeOTUs"); exit(1); } } /***********************************************************************/ vector SharedRAbundVectors::getNamesGroups(){ try { vector names; names.clear(); for (int i = 0; i < lookup.size(); i ++) { names.push_back(lookup[i]->getGroup()); } return names; } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "getNamesGroups"); exit(1); } } /***********************************************************************/ SharedOrderVector SharedRAbundVectors::getSharedOrderVector(){ try { SharedOrderVector order; for (int i = 0; i < lookup.size(); i++) { for (int j = 0; j < lookup[i]->getNumBins(); j++) { int abund = lookup[i]->get(j); if (abund != 0) { for (int k = 0; k < abund; k++) { order.push_back(j, lookup[i]->getGroup()); } } } } return order; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSharedOrderVector"); exit(1); } } /***********************************************************************/ void SharedRAbundVectors::removeGroups(vector g){ try { bool remove = false; for (vector::iterator it = lookup.begin(); it != lookup.end();) { //if this sharedrabund is not from a group the user wants then delete it. if (util.inUsersGroups((*it)->getGroup(), g)) { remove = true; delete (*it); (*it) = nullptr; it = lookup.erase(it); }else { ++it; } } if (remove) { eliminateZeroOTUS(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "removeGroups"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::removeGroups(int minSize, bool silent){ try { vector Groups; bool remove = false; for (vector::iterator it = lookup.begin(); it != lookup.end();) { if ((*it)->getNumSeqs() < minSize) { if (!silent) { m->mothurOut((*it)->getGroup() + " contains " + toString((*it)->getNumSeqs()) + ". Eliminating.\n"); } delete (*it); (*it) = nullptr; it = lookup.erase(it); remove = true; }else { Groups.push_back((*it)->getGroup()); ++it; } } if (remove) { eliminateZeroOTUS(); } groupNames.clear(); for (int i = 0; i < lookup.size(); i ++) { groupNames[lookup[i]->getGroup()] = i; } return lookup.size(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "removeGroups"); exit(1); } } /***********************************************************************/ int SharedRAbundVectors::getNumSeqsSmallestGroup(){ try { int smallest = MOTHURMAX; for (int i = 0; i < lookup.size(); i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: " + lookup[i]->getGroup() + " numSeqs = " + toString(lookup[i]->getNumSeqs()) + "\n"); } if (lookup[i]->getNumSeqs() < smallest) { smallest = lookup[i]->getNumSeqs(); } } return smallest; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getNumSeqsSmallestGroup"); exit(1); } } /***********************************************************************/ vector SharedRAbundVectors::getSharedRAbundVectors(){ try { vector newLookup; for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { return newLookup; } SharedRAbundVector* temp = new SharedRAbundVector(*lookup[i]); newLookup.push_back(temp); } return newLookup; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ vector SharedRAbundVectors::getSharedRAbundFloatVectors(){ try { eliminateZeroOTUS(); vector newLookup; for (int i = 0; i < lookup.size(); i++) { if (m->getControl_pressed()) { return newLookup; } vector abunds; vector data = lookup[i]->get(); string group = lookup[i]->getGroup(); for (int j = 0; j < data.size(); j++) { abunds.push_back((float)data[j]); } SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(abunds); temp->setLabel(label); temp->setGroup(group); newLookup.push_back(temp); } return newLookup; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ RAbundVector SharedRAbundVectors::getRAbundVector(){ try { RAbundVector rav; rav.setLabel(label); for (int i = 0; i < numBins; i++) { int abund = getOTUTotal(i); rav.push_back(abund); } return rav; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ RAbundVector SharedRAbundVectors::getRAbundVector(string group){ try { RAbundVector rav; rav.setLabel(label); for (vector::iterator it = lookup.begin(); it != lookup.end();) { //if this sharedrabund is not from a group the user wants then delete it. if ((*it)->getGroup() == group) { for (int i = 0; i < (*it)->getNumBins(); i++) { rav.push_back((*it)->get(i)); } return rav; }else { ++it; } } return rav; } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getRAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector SharedRAbundVectors::getSAbundVector(){ try { RAbundVector rav = getRAbundVector(); return rav.getSAbundVector(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector SharedRAbundVectors::getSAbundVector(string group){ try { RAbundVector rav = getRAbundVector(group); return rav.getSAbundVector(); } catch(exception& e) { m->errorOut(e, "SharedRAbundVector", "getSAbundVector"); exit(1); } } /**********************************************************************************************************************/ void SharedRAbundVectors::eliminateZeroOTUS() { try { if (currentLabels.size() != numBins) { currentLabels = getOTUNames(); } if (lookup.size() > 1) { vector otusToRemove; for (int i = 0; i < lookup[0]->getNumBins(); i++) { if (m->getControl_pressed()) { break; } int total = getOTUTotal(i); //if they are not all zero add this bin if (total == 0) { otusToRemove.push_back(i); } //sorted order } removeOTUs(otusToRemove, true); //sorted } } catch(exception& e) { m->errorOut(e, "SharedRAbundVectors", "eliminateZeroOTUS"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sharedrabundvectors.hpp000077500000000000000000000075561424121717000242770ustar00rootroot00000000000000// // sharedrabundvectors.hpp // Mothur // // Created by Sarah Westcott on 5/15/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef sharedrabundvectors_hpp #define sharedrabundvectors_hpp #include "datavector.hpp" #include "rabundvector.hpp" #include "rabundfloatvector.hpp" #include "sharedordervector.h" #include "sharedrabundvector.hpp" #include "sharedrabundfloatvector.hpp" /* DataStructure for a shared file. */ //******************************************************************************************************************** inline bool compareRAbunds(SharedRAbundVector* left, SharedRAbundVector* right){ return (left->getGroup() < right->getGroup()); } //******************************************************************************************************************** class SharedRAbundVectors : public DataVector { public: SharedRAbundVectors() : DataVector() { label = ""; numBins = 0; otuTag = "Otu"; } SharedRAbundVectors(string oTag) : DataVector() { label = ""; numBins = 0; otuTag = oTag; } SharedRAbundVectors(ifstream&, vector& userGroups, string&, string&); SharedRAbundVectors(SharedRAbundVectors& bv) : DataVector(bv), numBins(bv.numBins), otuTag(bv.otuTag) { vector data = bv.getSharedRAbundVectors(); for (int i = 0; i < data.size(); i++) { push_back(data[i]); } setLabels(bv.getLabel()); setOTUNames(bv.getOTUNames()); eliminateZeroOTUS(); } ~SharedRAbundVectors() { clear(); } void setLabels(string l); int getOTUTotal(int bin); int getOTUTotal(string otuLabel); //returns 0 if otuLabel is not found vector getOTU(int bin); int get(int bin, string group); void set(int bin, int binSize, string group); void setOTUNames(vector names); vector getOTUNames(); string getOTUName(int); void setOTUName(int, string); int getNumBins() { return numBins; } int getNumSeqsSmallestGroup(); vector getNamesGroups(); //same order as Rabunds int getNumGroups() { return (int)lookup.size(); } int getNumSeqs(string); //group int push_back(vector, string binLabel=""); //add otu. mothur assumes abunds are in same order as groups. int push_back(SharedRAbundVector*); void eliminateZeroOTUS(); //run after push_backs if groups are chosen int removeOTU(int bin); int removeOTUs(vector, bool sorted=false); //bins to remove, sorted or not void removeGroups(vector g); int removeGroups(int minSize, bool silent=false); // removes any groups with numSeqs < minSize int size() { return (int)lookup.size(); } void resize(int n) { m->mothurOut("[ERROR]: can not use resize for SharedRAbundVectors.\n"); m->setControl_pressed(true); } void clear() { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != nullptr) { delete lookup[i]; lookup[i] = nullptr; } } lookup.clear(); groupNames.clear(); numBins = 0; } void print(ostream&, bool&); void printTidy(ostream&, bool&, bool); RAbundVector getRAbundVector(); RAbundVector getRAbundVector(string); //group you want the rabund for SAbundVector getSAbundVector(); SAbundVector getSAbundVector(string); //group you want the sabund for OrderVector getOrderVector(map*) { m->mothurOut("[ERROR]: can not convert SharedRAbundVectors to an ordervector, ordervectors assume no zero OTUS.\n"); m->setControl_pressed(true); OrderVector o; return o; } SharedOrderVector getSharedOrderVector(); vector getSharedRAbundVectors(); vector getSharedRAbundFloatVectors(); private: void printHeaders(ostream&, bool&); vector lookup; vector currentLabels; map groupNames; int numBins; string otuTag; }; #endif /* sharedrabundvectors_hpp */ mothur-1.48.0/source/datastructures/sparsedistancematrix.cpp000077500000000000000000000147051424121717000244510ustar00rootroot00000000000000// // sparsedistancematrix.cpp // Mothur // // Created by Sarah Westcott on 7/16/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "sparsedistancematrix.h" /***********************************************************************/ SparseDistanceMatrix::SparseDistanceMatrix() : numNodes(0), smallDist(MOTHURMAX){ m = MothurOut::getInstance(); sorted=false; aboveCutoff = MOTHURMAX; } /***********************************************************************/ int SparseDistanceMatrix::getNNodes(){ return numNodes; } /***********************************************************************/ void SparseDistanceMatrix::clear(){ for (int i = 0; i < seqVec.size(); i++) { seqVec[i].clear(); } seqVec.clear(); } /***********************************************************************/ float SparseDistanceMatrix::getSmallDist(){ return smallDist; } /***********************************************************************/ int SparseDistanceMatrix::updateCellCompliment(ull row, ull col){ try { ull vrow = seqVec[row][col].index; ull vcol = 0; //find the columns entry for this cell as well for (int i = 0; i < seqVec[vrow].size(); i++) { if (seqVec[vrow][i].index == row) { vcol = i; break; } } seqVec[vrow][vcol].dist = seqVec[row][col].dist; return 0; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "updateCellCompliment"); exit(1); } } /***********************************************************************/ int SparseDistanceMatrix::rmCell(ull row, ull col){ try { numNodes-=2; ull vrow = seqVec[row][col].index; ull vcol = 0; //find the columns entry for this cell as well for (int i = 0; i < seqVec[vrow].size(); i++) { if (seqVec[vrow][i].index == row) { vcol = i; break; } } seqVec[vrow].erase(seqVec[vrow].begin()+vcol); seqVec[row].erase(seqVec[row].begin()+col); //print(); return(0); } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "rmCell"); exit(1); } } /***********************************************************************/ void SparseDistanceMatrix::addCell(ull row, PDistCell cell){ try { numNodes+=2; if(cell.dist < smallDist){ smallDist = cell.dist; } seqVec[row].push_back(cell); PDistCell temp(row, cell.dist); seqVec[cell.index].push_back(temp); } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "addCell"); exit(1); } } /***********************************************************************/ int SparseDistanceMatrix::addCellSorted(ull row, PDistCell cell){ try { numNodes+=2; if(cell.dist < smallDist){ smallDist = cell.dist; } seqVec[row].push_back(cell); PDistCell temp(row, cell.dist); seqVec[cell.index].push_back(temp); sortSeqVec(row); sortSeqVec(cell.index); int location = -1; //find location of new cell when sorted for (int i = 0; i < seqVec[row].size(); i++) { if (seqVec[row][i].index == cell.index) { location = i; break; } } return location; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "addCellSorted"); exit(1); } } /***********************************************************************/ ull SparseDistanceMatrix::getSmallestCell(ull& row){ try { if (!sorted) { sortSeqVec(); sorted = true; } //print(); vector mins; smallDist = MOTHURMAX; for (int i = 0; i < seqVec.size(); i++) { for (int j = 0; j < seqVec[i].size(); j++) { if (m->getControl_pressed()) { return smallDist; } //already checked everyone else in row if (i < seqVec[i][j].index) { float dist = seqVec[i][j].dist; if(dist < smallDist){ //found a new smallest distance mins.clear(); smallDist = dist; PDistCellMin temp(i, seqVec[i][j].index); mins.push_back(temp); } else if(util.isEqual(dist, smallDist)){ //if a subsequent distance is the same as mins distance add the new iterator to the mins vector PDistCellMin temp(i, seqVec[i][j].index); mins.push_back(temp); } }else { j+=seqVec[i].size(); } //stop looking } } util.mothurRandomShuffle(mins); //randomize the order of the iterators in the mins vector row = mins[0].row; ull col = mins[0].col; return col; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "getSmallestCell"); exit(1); } } /***********************************************************************/ void SparseDistanceMatrix::print(){ try { cout << endl; //saves time in getSmallestCell, by making it so you dont search the repeats for (int i = 0; i < seqVec.size(); i++) { cout << i << '\t'; for (int j = 0; j < seqVec[i].size(); j++) { cout << seqVec[i][j].index << '\t' ; } cout << endl; } cout << endl; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec"); exit(1); } } /***********************************************************************/ int SparseDistanceMatrix::sortSeqVec(){ try { //saves time in getSmallestCell, by making it so you dont search the repeats for (int i = 0; i < seqVec.size(); i++) { sort(seqVec[i].begin(), seqVec[i].end(), compareIndexes); } return 0; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec"); exit(1); } } /***********************************************************************/ int SparseDistanceMatrix::sortSeqVec(int index){ try { //saves time in getSmallestCell, by making it so you dont search the repeats sort(seqVec[index].begin(), seqVec[index].end(), compareIndexes); return 0; } catch(exception& e) { m->errorOut(e, "SparseDistanceMatrix", "sortSeqVec"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sparsedistancematrix.h000077500000000000000000000022331424121717000241070ustar00rootroot00000000000000#ifndef Mothur_sparsedistancematrix_h #define Mothur_sparsedistancematrix_h // // sparsedistancematrix.h // Mothur // // Created by Sarah Westcott on 7/16/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "mothur.h" #include "mothurout.h" #include "utils.hpp" class ListVector; /***********************************************************************/ class SparseDistanceMatrix { public: SparseDistanceMatrix(); ~SparseDistanceMatrix(){ clear(); } int getNNodes(); ull getSmallestCell(ull& index); //Return the cell with the smallest distance float getSmallDist(); int rmCell(ull, ull); int updateCellCompliment(ull, ull); void resize(ull n) { seqVec.resize(n); } void clear(); void addCell(ull, PDistCell); int addCellSorted(ull, PDistCell); vector > seqVec; void print(); private: PDistCell smallCell; //The cell with the smallest distance int numNodes; bool sorted; int sortSeqVec(); int sortSeqVec(int); float smallDist, aboveCutoff; MothurOut* m; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/datastructures/sparsematrix.cpp000077500000000000000000000112321424121717000227260ustar00rootroot00000000000000 #include "sparsematrix.hpp" #include "listvector.hpp" /***********************************************************************/ SparseMatrix::SparseMatrix() : numNodes(0), minsIndex(0), smallDist(MOTHURMAX){ m = MothurOut::getInstance(); } /***********************************************************************/ int SparseMatrix::getNNodes(){ return numNodes; } /***********************************************************************/ float SparseMatrix::getSmallDist(){ return smallDist; } /***********************************************************************/ MatData SparseMatrix::rmCell(MatData data){ try { if(data->vectorMap != nullptr ){ *(data->vectorMap) = nullptr; data->vectorMap = nullptr; } data = matrix.erase(data); numNodes--; return(data); // seems like i should be updating smallDist here, but the only time we remove cells is when // clustering and the clustering algorithm updates smallDist } catch(exception& e) { m->errorOut(e, "SparseMatrix", "rmCell"); exit(1); } } /***********************************************************************/ void SparseMatrix::addCell(PCell value){ try { matrix.push_back(value); numNodes++; if(value.dist < smallDist){ smallDist = value.dist; } } catch(exception& e) { m->errorOut(e, "SparseMatrix", "addCell"); exit(1); } } /***********************************************************************/ void SparseMatrix::clear(){ try { matrix.clear(); mins.clear(); numNodes = 0; minsIndex = 0; smallDist = MOTHURMAX; } catch(exception& e) { m->errorOut(e, "SparseMatrix", "clear"); exit(1); } } /***********************************************************************/ MatData SparseMatrix::begin(){ return matrix.begin(); } /***********************************************************************/ MatData SparseMatrix::end(){ return matrix.end(); } /***********************************************************************/ void SparseMatrix::print(){ try { int index = 0; cout << endl << "Index\tRow\tColumn\tDistance" << endl; for(MatData currentCell=matrix.begin();currentCell!=matrix.end();currentCell++){ cout << index << '\t' << currentCell->row << '\t' << currentCell->column << '\t' << currentCell->dist << endl; index++; } } catch(exception& e) { m->errorOut(e, "SparseMatrix", "print"); exit(1); } } /***********************************************************************/ void SparseMatrix::print(ListVector* list){ try { int index = 0; m->mothurOutEndLine(); m->mothurOut("Index\tRow\tColumn\tDistance\n"); for(MatData currentCell=matrix.begin();currentCell!=matrix.end();currentCell++){ m->mothurOut(toString(index) + "\t" + toString(list->get(currentCell->row)) + "\t" + toString(list->get(currentCell->column)) + "\t" + toString(currentCell->dist)); m->mothurOutEndLine(); index++; } } catch(exception& e) { m->errorOut(e, "SparseMatrix", "print"); exit(1); } } /***********************************************************************/ PCell* SparseMatrix::getSmallestCell(){ try { // this is where I check to see if the next small distance has the correct distance // if it doesn't then I remove the offending Cell -> should also be able to check for // invalid iterator / pointer -- right??? while(!mins.empty() && mins.back() == nullptr){ mins.pop_back(); } // if the mins vector is empty go here... if(mins.empty()){ mins.clear(); smallDist = begin()->dist; //set the first candidate small distance for(MatData currentCell=begin();currentCell!=end();currentCell++){ float dist = currentCell->dist; if(dist < smallDist){ //found a new smallest distance mins.clear(); smallDist = dist; mins.push_back(&*currentCell); //this is the address of the data in the list being pointed to by the MatData iterator } else if(util.isEqual(dist, smallDist)){ //if a subsequent distance is the same as mins distance add the new iterator to the mins vector mins.push_back(&*currentCell); //this is the address of the data in the list being pointed to by the MatData iterator } } util.mothurRandomShuffle(mins); //randomize the order of the iterators in the mins vector for(int i=0;ivectorMap = &mins[i]; //assign vectorMap to the address for the container } } smallCell = mins.back(); //make the smallestCell the last element of the vector mins.pop_back(); //remove the last element from the vector return smallCell; } catch(exception& e) { m->errorOut(e, "SparseMatrix", "getSmallestCell"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/sparsematrix.hpp000077500000000000000000000017441424121717000227420ustar00rootroot00000000000000#ifndef SPARSEMATRIX_H #define SPARSEMATRIX_H #include "mothur.h" #include "mothurout.h" #include "utils.hpp" class ListVector; /***********************************************************************/ typedef list::iterator MatData; class SparseMatrix { public: SparseMatrix(); ~SparseMatrix(){ while(!mins.empty() && mins.back() == nullptr){ mins.pop_back(); } } int getNNodes(); void print(); //Print the contents of the matrix void print(ListVector*); //Print the contents of the matrix PCell* getSmallestCell(); //Return the cell with the smallest distance float getSmallDist(); MatData rmCell(MatData); void addCell(PCell); void clear(); MatData begin(); MatData end(); private: PCell* smallCell; //The cell with the smallest distance int numNodes; list matrix; vector mins; float smallDist; int minsIndex; MothurOut* m; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/datastructures/storagedatabase.hpp000066400000000000000000000017511424121717000233440ustar00rootroot00000000000000// // storagedatabase.hpp // Mothur // // Created by Sarah Westcott on 6/3/21. // Copyright © 2021 Schloss Lab. All rights reserved. // #ifndef storagedatabase_hpp #define storagedatabase_hpp #include "mothurout.h" #include "sequence.hpp" #include "protein.hpp" class StorageDatabase { public: StorageDatabase() { m = MothurOut::getInstance(); length = 0; samelength = true; } virtual ~StorageDatabase() = default; //loops through data and delete each sequence virtual int getNumSeqs() = 0; virtual void print(string) = 0; virtual bool sameLength() { return samelength; } virtual Sequence getSeq(int) { Sequence s; return s; } virtual Protein getProt(int) { Protein p; return p; } virtual void push_back(Sequence) {} //adds sequence virtual void push_back(Protein) {} //adds protein protected: MothurOut* m; Utils util; bool samelength; int length; }; #endif /* storagedatabase_hpp */ mothur-1.48.0/source/datastructures/suffixdb.cpp000077500000000000000000000062521424121717000220240ustar00rootroot00000000000000/* * suffixdb.cpp * * * Created by Pat Schloss on 12/16/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but * faster than blast * */ #include "searchdatabase.hpp" #include "sequence.hpp" #include "suffixtree.hpp" #include "suffixdb.hpp" /**************************************************************************************************/ SuffixDB::SuffixDB(int numSeqs) : SearchDatabase() { suffixForest.resize(numSeqs); count = 0; } /**************************************************************************************************/ SuffixDB::SuffixDB() : SearchDatabase() { count = 0; } /**************************************************************************************************/ //assumes sequences have been added using addSequence vector SuffixDB::findClosestSequences(Sequence* candidateSeq, int num, vector& Scores) const{ try { vector topMatches; string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints vector seqMatches; for(int i=0;ierrorOut(e, "SuffixDB", "findClosestSequences"); exit(1); } } /**************************************************************************************************/ //adding the sequences generates the db void SuffixDB::addSequence(Sequence seq) { try { suffixForest[count].loadSequence(seq); count++; } catch(exception& e) { m->errorOut(e, "SuffixDB", "addSequence"); exit(1); } } /**************************************************************************************************/ SuffixDB::~SuffixDB(){ for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); } } /**************************************************************************************************/ mothur-1.48.0/source/datastructures/suffixdb.hpp000077500000000000000000000024311424121717000220240ustar00rootroot00000000000000#ifndef SUFFIXDB_HPP #define SUFFIXDB_HPP /* * suffixdb.hpp * * * Created by Pat Schloss on 12/16/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest object * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but * faster than blast * */ #include "mothur.h" #include "searchdatabase.hpp" #include "suffixtree.hpp" class SuffixDB : public SearchDatabase { public: SuffixDB(int); SuffixDB(); ~SuffixDB(); void generateDB(){}; //adding sequences generates the db void addSequence(Sequence); vector findClosestSequences(Sequence*, int, vector&) const; private: vector suffixForest; int count; }; #endif mothur-1.48.0/source/datastructures/suffixnodes.cpp000077500000000000000000000107701424121717000225470ustar00rootroot00000000000000/* * SuffixNodes.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * There are two types of nodes in a suffix tree as I have implemented it. First, there are the internal nodes that * have children, these are the SuffixBranch objects. There are also the terminal nodes, which are the suffixBranches. * I divided them into two groups to save on memory. A SuffixTree object will be a vector of SuffixNodes; therefore, * the values of parentNode, children nodes, and suffix nodes are stored as ints that correspond to indices in the * vector * */ #include "suffixnodes.hpp" //******************************************************************************************************************** inline char deCodeSequence(char code){ if(code == '0') { return 'a'; } // this method allows us to go from the int string to a char string; else if(code == '1') { return 'c'; } // it's only really useful if we want to print out the tree else if(code == '2') { return 'g'; } else if(code == '3') { return 't'; } else if(code == '4') { return 'n'; } else { return '$'; } } //******************************************************************************************************************** SuffixNode::SuffixNode(int parent, int start, int end) : parentNode(parent), // we store the parent node as an int startCharPosition(start), // the suffix tree class will hold the sequence that the startCharPosition and endCharPosition(end) // endCharPosition indices correspond to { /* do nothing */ m = MothurOut::getInstance(); } void SuffixNode::setChildren(char, int) { /* do nothing */ } // there's no children in a leaf int SuffixNode::getNumChildren() { return 0; } // ditto void SuffixNode::eraseChild(char) { /* do nothing */ } // ditto int SuffixNode::getChild(char) { return -1; } // ditto void SuffixNode::setSuffixNode(int) { /* do nothing */ } // there's no suffix node in a leaf int SuffixNode::getSuffixNode() { return -1; } // ditto int SuffixNode::getParentNode() { return parentNode; } void SuffixNode::setParentNode(int number) { parentNode = number; } int SuffixNode::getStartCharPos() { return startCharPosition; } void SuffixNode::setStartCharPos(int start) { startCharPosition = start; } int SuffixNode::getEndCharPos() { return endCharPosition; } //******************************************************************************************************************** SuffixLeaf::SuffixLeaf(int parent, int start, int end) : SuffixNode(parent, start, end) { /* do nothing */ } void SuffixLeaf::print(string sequence, int nodeNumber){ m->mothurOut(toString(this) + "\t" + toString(parentNode) + "\t" + toString(nodeNumber) + "\t" + toString(-1) + "\t" + toString(startCharPosition) + "\t" + toString(endCharPosition) + "\t"); m->mothurOut("/"); for(int i=startCharPosition;i<=endCharPosition;i++){ m->mothurOut(toString(deCodeSequence(sequence[i]))); } m->mothurOut("/"); m->mothurOutEndLine(); } //******************************************************************************************************************** SuffixBranch::SuffixBranch(int parent, int start, int end) : SuffixNode(parent, start, end), suffixNode(-1){ childNodes.assign(6, -1); } void SuffixBranch::print(string sequence, int nodeNumber){ // this method is different that than m->mothurOut(toString(this) + "\t" + toString(parentNode) + "\t" + toString(nodeNumber) + "\t" + // of a leaf because it prints out a toString(suffixNode) + "\t" + toString(startCharPosition) + "\t" + toString(endCharPosition) + "\t"); // value for the suffix node m->mothurOut("/"); for(int i=startCharPosition;i<=endCharPosition;i++){ m->mothurOut(toString(deCodeSequence(sequence[i]))); } m->mothurOut("/"); m->mothurOutEndLine(); } // we can access the children by subtracting '0' from the the char value from the string, the difference is an int // value and the index we need to access. void SuffixBranch::eraseChild(char base) { childNodes[base - '0'] = -1; } //to erase set the child index to -1 void SuffixBranch::setChildren(char base, int nodeIndex){ childNodes[base - '0'] = nodeIndex; } void SuffixBranch::setSuffixNode(int nodeIndex){ suffixNode = nodeIndex; } int SuffixBranch::getSuffixNode() { return suffixNode; } int SuffixBranch::getChild(char base) { return childNodes[base - '0']; } //******************************************************************************************************************** mothur-1.48.0/source/datastructures/suffixnodes.hpp000077500000000000000000000055261424121717000225570ustar00rootroot00000000000000#ifndef SUFFIXNODES_H #define SUFFIXNODES_H /* * SuffixNodes.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * There are two types of nodes in a suffix tree as I have implemented it. First, there are the internal nodes that * have children, these are the SuffixBranch objects. There are also the terminal nodes, which are the suffixBranches. * I divided them into two groups to save on memory. A SuffixTree object will be a vector of SuffixNodes; therefore, * the values of parentNode, children nodes, and suffix nodes are stored as ints that correspond to indices in the * vector * */ #include "mothur.h" #include "mothurout.h" //******************************************************************************************************************** class SuffixNode { public: SuffixNode(int, int, int); virtual ~SuffixNode() = default; virtual void print(string, int) = 0; virtual void setChildren(char, int); virtual int getNumChildren(); virtual void eraseChild(char); virtual void setSuffixNode(int); virtual int getSuffixNode(); virtual int getChild(char); int getParentNode(); void setParentNode(int); int getStartCharPos(); void setStartCharPos(int start); int getEndCharPos(); protected: int parentNode; int startCharPosition; int endCharPosition; MothurOut* m; }; //******************************************************************************************************************** class SuffixLeaf : public SuffixNode { // most of the methods are already set in the parent class public: SuffixLeaf(int, int, int); // we just need to define a constructor and ~SuffixLeaf() = default; void print(string, int); // print method }; //******************************************************************************************************************** class SuffixBranch : public SuffixNode { public: SuffixBranch(int, int, int); ~SuffixBranch() = default; void print(string, int); // need a special method for printing the node because there are children void eraseChild(char); // need a special method for erasing the children void setChildren(char, int); // need a special method for setting children void setSuffixNode(int); // need a special method for setting the suffix node int getSuffixNode(); // need a special method for returning the suffix node int getChild(char); // need a special method for return children private: vector childNodes; // a suffix branch is unique because it has children and a suffixNode. The int suffixNode; // are stored in a vector for super-fast lookup. If the alphabet were bigger, this }; // might not be practical. Since we only have 5 possible letters, it makes sense //******************************************************************************************************************** #endif mothur-1.48.0/source/datastructures/suffixtree.cpp000077500000000000000000000302411424121717000223710ustar00rootroot00000000000000/* * suffixtree.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is my half-assed attempt to implement a suffix tree. This is a cobbled together algorithm using materials that * I found at http://marknelson.us/1996/08/01/suffix-trees/ and: * * Ukkonen E. (1995). On-line construction of suffix trees. Algorithmica 14 (3): 249--260 * Gusfield, Dan (1999). Algorithms on Strings, Trees and Sequences: Computer Science and Computational Biology. * USA: Cambridge University Press * * The Ukkonen paper is the seminal paper describing the on-line method of constructing a suffix tree. * * I have chosen to store the nodes of the tree as a vector of pointers to SuffixNode objects. The root is stored at * nodeVector[0]. Each tree also stores the sequence name and the string that corresponds to the actual sequence. * Finally, this class provides a way of counting the number of suffixes that are needed in one tree to generate a new * sequence (countSuffixes). This method is used to determine similarity between sequences and was inspired by the * article and Perl source code provided at http://www.ddj.com/web-development/184416093. * */ #include "sequence.hpp" #include "suffixnodes.hpp" #include "suffixtree.hpp" //******************************************************************************************************************** inline bool compareParents(SuffixNode* left, SuffixNode* right){// this is necessary to print the tree and to sort the return (left->getParentNode() < right->getParentNode()); // nodes in order of their parent } //******************************************************************************************************************** SuffixTree::SuffixTree(){ m = MothurOut::getInstance(); } //******************************************************************************************************************** SuffixTree::~SuffixTree(){ for(int i=0;i hold = nodeVector; sort(hold.begin(), hold.end(), compareParents); m->mothurOut("Address\t\tParent\tNode\tSuffix\tStartC\tEndC\tSuffix\n"); for(int i=1;i<=nodeCounter;i++){ hold[i]->print(sequence, i); } } //******************************************************************************************************************** int SuffixTree::countSuffixes(string compareSequence, int& minValue){ // here we count the number of suffix parts // we need to rewrite a user supplied sequence. if the int numSuffixes = 0; // count exceeds the supplied minValue, bail out. The int seqLength = compareSequence.length(); // time complexity should be O(L) int position = 0; int presentNode = 0; while(position < seqLength){ // while the position in the query sequence isn't at the end... if(numSuffixes > minValue) { return 1000000; } // bail if the count gets too high int newNode = nodeVector[presentNode]->getChild(compareSequence[position]); // see if the current node has a // child that matches the next character in the query if(newNode == -1){ if(presentNode == 0){ position++; } // if not, go back to the root and increase the count numSuffixes++; // by one. presentNode = 0; } else{ // if there is, move to that node and see how far down presentNode = newNode; // it we can get for(int i=nodeVector[newNode]->getStartCharPos(); i<=nodeVector[newNode]->getEndCharPos(); i++){ if(compareSequence[position] == sequence[i]){ position++; // as long as the query and branch agree, keep going } else{ numSuffixes++; // if there is a mismatch, increase the number of presentNode = 0; // suffixes and go back to the root break; } } } // if we get all the way through the node we'll go to the top of the while loop and find the child node // that corresponds to what we are interested in } numSuffixes--; // the method puts an extra count on numSuffixes if(numSuffixes < minValue) { minValue = numSuffixes; } // if the count is less than the previous minValue, return numSuffixes; // change the value and return the number of suffixes } //******************************************************************************************************************** int SuffixTree::countSuffixes(string compareSequence) const{ // here we count the number of suffix parts // we need to rewrite a user supplied sequence. if the int numSuffixes = 0; // count exceeds the supplied minValue, bail out. The int seqLength = compareSequence.length(); // time complexity should be O(L) int position = 0; int presentNode = 0; while(position < seqLength){ // while the position in the query sequence isn't at the end... int newNode = nodeVector[presentNode]->getChild(compareSequence[position]); // see if the current node has a // child that matches the next character in the query if(newNode == -1){ if(presentNode == 0){ position++; } // if not, go back to the root and increase the count numSuffixes++; // by one. presentNode = 0; } else{ // if there is, move to that node and see how far down presentNode = newNode; // it we can get for(int i=nodeVector[newNode]->getStartCharPos(); i<=nodeVector[newNode]->getEndCharPos(); i++){ if(compareSequence[position] == sequence[i]){ position++; // as long as the query and branch agree, keep going } else{ numSuffixes++; // if there is a mismatch, increase the number of presentNode = 0; // suffixes and go back to the root break; } } } // if we get all the way through the node we'll go to the top of the while loop and find the child node // that corresponds to what we are interested in } numSuffixes--; // the method puts an extra count on numSuffixes return numSuffixes; // change the value and return the number of suffixes } //******************************************************************************************************************** void SuffixTree::canonize(){ // if you have to ask how this works, you don't really want to know and this really // isn't the place to ask. if (!isExplicit()) { // if the node has no children... int tempNodeIndex = nodeVector[activeNode]->getChild(sequence[activeStartPosition]); SuffixNode* tempNode = nodeVector[tempNodeIndex]; int span = tempNode->getEndCharPos() - tempNode->getStartCharPos(); while ( span <= ( activeEndPosition - activeStartPosition ) ) { activeStartPosition = activeStartPosition + span + 1; activeNode = tempNodeIndex; if ( activeStartPosition <= activeEndPosition ) { tempNodeIndex = nodeVector[tempNodeIndex]->getChild(sequence[activeStartPosition]); tempNode = nodeVector[tempNodeIndex]; span = tempNode->getEndCharPos() - tempNode->getStartCharPos(); } } } } //******************************************************************************************************************** int SuffixTree::split(int nodeIndex, int position){ // leaves stay leaves, etc, to split a leaf we make a new interior // node and reconnect everything SuffixNode* node = nodeVector[nodeIndex]; // get the node that needs to be split SuffixNode* parentNode = nodeVector[node->getParentNode()]; // get it's parent node parentNode->eraseChild(sequence[node->getStartCharPos()]); // erase the present node from the registry of its parent nodeCounter++; SuffixNode* newNode = new SuffixBranch(node->getParentNode(), node->getStartCharPos(), node->getStartCharPos() + activeEndPosition - activeStartPosition); // create a new node that will link the parent with the old child parentNode->setChildren(sequence[newNode->getStartCharPos()], nodeCounter);// give the parent the new child nodeVector.push_back(newNode); node->setParentNode(nodeCounter); // give the original node the new node as its parent newNode->setChildren(sequence[node->getStartCharPos() + activeEndPosition - activeStartPosition + 1], nodeIndex); // put the original node in the registry of the new node's children newNode->setSuffixNode(activeNode);//link the new node with the old active node // recalculate the startCharPosition of the outermost node node->setStartCharPos(node->getStartCharPos() + activeEndPosition - activeStartPosition + 1 ); return node->getParentNode(); } //******************************************************************************************************************** void SuffixTree::makeSuffixLink(int& previous, int present){ // here we link the nodes that are suffixes of one another to rapidly speed through the tree if ( previous > 0 ) { nodeVector[previous]->setSuffixNode(present); } else { /* do nothing */ } previous = present; } //******************************************************************************************************************** void SuffixTree::addPrefix(int prefixPosition){ int lastParentNode = -1; // we need to place a new prefix in the suffix tree int parentNode = 0; while(1){ parentNode = activeNode; if(isExplicit()){ // if the node is explicit (has kids), try to follow it down the branch if its there... if(nodeVector[activeNode]->getChild(sequence[prefixPosition]) != -1){ // break out and get next prefix... break; } else{ // ...otherwise continue, we'll need to make a new node later on... } } else{ // if it's not explicit (no kids), read through and see if all of the chars agree... int tempNode = nodeVector[activeNode]->getChild(sequence[activeStartPosition]); int span = activeEndPosition - activeStartPosition; if(sequence[nodeVector[tempNode]->getStartCharPos() + span + 1] == sequence[prefixPosition] ){ break; // if the existing suffix agrees with the new one, grab a new prefix... } else{ parentNode = split(tempNode, prefixPosition); // ... otherwise we need to split the node } } nodeCounter++; // we need to generate a new node here if the kid didn't exist, or we split a node SuffixNode* newSuffixLeaf = new SuffixLeaf(parentNode, prefixPosition, sequence.length()-1); nodeVector[parentNode]->setChildren(sequence[prefixPosition], nodeCounter); nodeVector.push_back(newSuffixLeaf); makeSuffixLink( lastParentNode, parentNode ); // make a suffix link for the parent node if(nodeVector[activeNode]->getParentNode() == -1){ // move along the start position for the tree activeStartPosition++; } else { activeNode = nodeVector[activeNode]->getSuffixNode(); } canonize(); // frankly, i'm not entirely clear on what canonize does. } makeSuffixLink( lastParentNode, parentNode ); activeEndPosition++; // move along the end position for the tree canonize(); // frankly, i'm not entirely clear on what canonize does. } //******************************************************************************************************************** mothur-1.48.0/source/datastructures/suffixtree.hpp000077500000000000000000000041221424121717000223750ustar00rootroot00000000000000#ifndef SUFFIXTREE_H #define SUFFIXTREE_H /* * suffixtree.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is my half-assed attempt to implement a suffix tree. This is a cobbled together algorithm using materials that * I found at http://marknelson.us/1996/08/01/suffix-trees/ and: * * Ukkonen E. (1995). On-line construction of suffix trees. Algorithmica 14 (3): 249--260 * Gusfield, Dan (1999). Algorithms on Strings, Trees and Sequences: Computer Science and Computational Biology. * USA: Cambridge University Press * * The Ukkonen paper is the seminal paper describing the on-line method of constructing a suffix tree. * * I have chosen to store the nodes of the tree as a vector of pointers to SuffixNode objects. The root is stored at * nodeVector[0]. Each tree also stores the sequence name and the string that corresponds to the actual sequence. * Finally, this class provides a way of counting the number of suffixes that are needed in one tree to generate a new * sequence (countSuffixes). This method is used to determine similarity between sequences and was inspired by the * article and Perl source code provided at http://www.ddj.com/web-development/184416093. * */ #include "mothur.h" class SuffixNode; //******************************************************************************************************************** class SuffixTree { public: SuffixTree(); ~SuffixTree(); void loadSequence(Sequence); string getSeqName(); void print(); int countSuffixes(string, int&); int countSuffixes(string) const; private: void addPrefix(int); void canonize(); int split(int, int); void makeSuffixLink(int&, int); bool isExplicit(){ return activeStartPosition > activeEndPosition; } int activeStartPosition; int activeEndPosition; vector nodeVector; int root; int activeNode; int nodeCounter; string seqName; string sequence; MothurOut* m; }; //******************************************************************************************************************** #endif mothur-1.48.0/source/datastructures/taxonomy.cpp000066400000000000000000000136761424121717000220750ustar00rootroot00000000000000// // constaxonomy.cpp // Mothur // // Created by Sarah Westcott on 1/13/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #include "taxonomy.hpp" /***********************************************************************/ Taxonomy::Taxonomy(){ m = MothurOut::getInstance(); containsConfidence = false; } /***********************************************************************/ Taxonomy::Taxonomy(string otuname, string consensusTax, int num) { try { m = MothurOut::getInstance(); containsConfidence = false; name = otuname; numReps = num; taxonomy = parseTax(consensusTax); } catch(exception& e) { m->errorOut(e, "Taxonomy", "Taxonomy"); exit(1); } } /***********************************************************************/ Taxonomy::Taxonomy(string otuname, string consensusTax) { try { m = MothurOut::getInstance(); containsConfidence = false; name = otuname; numReps = 1; taxonomy = parseTax(consensusTax); } catch(exception& e) { m->errorOut(e, "Taxonomy", "Taxonomy"); exit(1); } } /***********************************************************************/ Taxonomy::Taxonomy(ifstream& in) { try { m = MothurOut::getInstance(); containsConfidence = false; string otu = ""; string consensusTax = "unknown"; int size = 0; in >> otu; gobble(in); in >> size; gobble(in); consensusTax = util.getline(in); gobble(in); name = otu; numReps = size; taxonomy = parseTax(consensusTax); } catch(exception& e) { m->errorOut(e, "Taxonomy", "Taxonomy"); exit(1); } } /***********************************************************************/ void Taxonomy::setTaxons(string consensusTax){ try { taxonomy = parseTax(consensusTax); }catch(exception& e) { m->errorOut(e, "Taxonomy", "setTaxons"); exit(1); } } /***********************************************************************/ string Taxonomy::getInlineConsTaxonomy(){ try { string otuConsensus = ""; otuConsensus += name + '\t' + toString(numReps) + '\t' + getConsTaxString(true); return otuConsensus; }catch(exception& e) { m->errorOut(e, "Taxonomy", "getInlineConsTaxonomy"); exit(1); } } /***********************************************************************/ vector Taxonomy::getSimpleTaxons(bool includeConfidence) { //pass in true to include confidences try { if (!containsConfidence) { includeConfidence = false; } vector items; for (int i = 0; i < taxonomy.size(); i++) { if (m->getControl_pressed()) { break; } string conTax = taxonomy[i].name; if (includeConfidence) { conTax += "(" + toString(taxonomy[i].confidence) + ")"; } items.push_back(conTax); } return items; }catch(exception& e) { m->errorOut(e, "Taxonomy", "getSimpleTaxons"); exit(1); } } /***********************************************************************/ string Taxonomy::getConsTaxString(bool includeConfidence) { //pass in true to include confidences try { string conTax = ""; if (!containsConfidence) { includeConfidence = false; } for (int i = 0; i < taxonomy.size(); i++) { if (m->getControl_pressed()) { break; } conTax += taxonomy[i].name; if (includeConfidence) { conTax += "(" + toString(taxonomy[i].confidence) + ")"; } conTax += ";"; } return conTax; }catch(exception& e) { m->errorOut(e, "Taxonomy", "getConsTaxString"); exit(1); } } /***********************************************************************/ vector Taxonomy::parseTax(string tax){ try { string taxon = ""; vector consTaxs; for(int i=0;igetControl_pressed()) { break; } if(tax[i] == ';'){ string newtaxon = taxon; float confidence = 0; containsConfidence = util.hasConfidenceScore(newtaxon, confidence); Taxon thisTax(newtaxon, confidence); consTaxs.push_back(thisTax); taxon = ""; } else{ taxon += tax[i]; } } return consTaxs; }catch(exception& e) { m->errorOut(e, "Taxonomy", "parseTax"); exit(1); } } /***********************************************************************/ void Taxonomy::printConsTax(ostream& out){ try { out << getInlineConsTaxonomy() << endl; }catch(exception& e) { m->errorOut(e, "Taxonomy", "printConsTax"); exit(1); } } /***********************************************************************/ void Taxonomy::printConsTax(OutputWriter* out){ try { out->write(getInlineConsTaxonomy()+"\n"); }catch(exception& e) { m->errorOut(e, "Taxonomy", "printConsTax"); exit(1); } } /***********************************************************************/ void Taxonomy::printConsTaxNoConfidence(ostream& out){ try { string otuConsensus = name + '\t' + toString(numReps) + '\t' + getConsTaxString(false); out << otuConsensus << endl; }catch(exception& e) { m->errorOut(e, "Taxonomy", "printConsTaxNoConfidence"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/datastructures/taxonomy.hpp000066400000000000000000000031451424121717000220700ustar00rootroot00000000000000// // constaxonomy.hpp // Mothur // // Created by Sarah Westcott on 1/13/20. // Copyright © 2020 Schloss Lab. All rights reserved. // #ifndef constaxonomy_hpp #define constaxonomy_hpp #include "mothurout.h" #include "utils.hpp" #include "writer.h" /**************************************************************************************************/ class Taxonomy { public: Taxonomy(); Taxonomy(string, string, int); //name, tax, abund Taxonomy(string, string); Taxonomy(ifstream&); ~Taxonomy() = default; void setName(string n) { name = n; } void setNumSeqs(int n) { numReps = n; } string getName() { return name; } vector getTaxons() { return taxonomy; } vector getSimpleTaxons (bool includeConfidence=false); void setTaxons(vector t) { taxonomy = t; } int getNumSeqs() { return numReps; } int getNumLevels() { return taxonomy.size(); } void setTaxons(string); string getInlineConsTaxonomy(); string getConsTaxString(bool includeConfidence=true); //pass in true to include confidences void printConsTax(ostream&); void printConsTax(OutputWriter*); void printConsTaxNoConfidence(ostream&); protected: MothurOut* m; string name; int numReps; bool containsConfidence; vector taxonomy; Utils util; vector parseTax(string); }; /**************************************************************************************************/ #endif /* constaxonomy_hpp */ mothur-1.48.0/source/datastructures/tree.cpp000077500000000000000000001151331424121717000211500ustar00rootroot00000000000000/* * tree.cpp * Mothur * * Created by Sarah Westcott on 1/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "tree.h" /*****************************************************************/ Tree::Tree(int num, CountTable* t, vector& T) : ct(t) { try { m = MothurOut::getInstance(); numLeaves = num; numNodes = 2*numLeaves - 1; tree.resize(numNodes); Treenames = T; } catch(exception& e) { m->errorOut(e, "Tree", "Tree - numNodes"); exit(1); } } /*****************************************************************/ Tree::Tree(CountTable* t, vector& Tnames) : ct(t) { try { m = MothurOut::getInstance(); if (Tnames.size() == 0) { m->mothurOut("[ERROR]: no valid treenames.\n"); m->setControl_pressed(true); } Treenames = Tnames; numLeaves = Treenames.size(); numNodes = 2*numLeaves - 1; tree.resize(numNodes); //initialize groupNodeInfo vector namesOfGroups = ct->getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(Treenames[i]); //save group info int maxPars = 1; vector group; vector counts = ct->getGroupCounts(Treenames[i]); for (int j = 0; j < namesOfGroups.size(); j++) { if (counts[j] != 0) { //you have seqs from this group groupNodeInfo[namesOfGroups[j]].push_back(i); group.push_back(namesOfGroups[j]); tree[i].pGroups[namesOfGroups[j]] = counts[j]; tree[i].pcount[namesOfGroups[j]] = counts[j]; //keep highest group if(counts[j] > maxPars){ maxPars = counts[j]; } } } tree[i].setGroup(group); setIndex(Treenames[i], i); if (maxPars > 1) { //then we have some more dominant groups //erase all the groups that are less than maxPars because you found a more dominant group. for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ if(it->second < maxPars){ tree[i].pGroups.erase(it++); }else { it++; } } //set one remaining groups to 1 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ tree[i].pGroups[it->first] = 1; } }//end if //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); vector tempGroups; tree[i].setGroup(tempGroups); } } } catch(exception& e) { m->errorOut(e, "Tree", "Tree"); exit(1); } } /*****************************************************************/ Tree::Tree(CountTable* t, vector< vector >& sims, vector& Tnames) : ct(t) { try { m = MothurOut::getInstance(); if (Tnames.size() == 0) { m->mothurOut("[ERROR]: no valid treenames.\n"); m->setControl_pressed(true); } Treenames = Tnames; numLeaves = Treenames.size(); numNodes = 2*numLeaves - 1; tree.resize(numNodes); //initialize groupNodeInfo vector namesOfGroups = ct->getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(Treenames[i]); //save group info int maxPars = 1; vector group; vector counts = ct->getGroupCounts(Treenames[i]); for (int j = 0; j < namesOfGroups.size(); j++) { if (counts[j] != 0) { //you have seqs from this group groupNodeInfo[namesOfGroups[j]].push_back(i); group.push_back(namesOfGroups[j]); tree[i].pGroups[namesOfGroups[j]] = counts[j]; tree[i].pcount[namesOfGroups[j]] = counts[j]; //keep highest group if(counts[j] > maxPars){ maxPars = counts[j]; } } } tree[i].setGroup(group); setIndex(Treenames[i], i); if (maxPars > 1) { //then we have some more dominant groups //erase all the groups that are less than maxPars because you found a more dominant group. for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ if(it->second < maxPars){ tree[i].pGroups.erase(it++); }else { it++; } } //set one remaining groups to 1 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ tree[i].pGroups[it->first] = 1; } }//end if //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); vector tempGroups; tree[i].setGroup(tempGroups); } } //build tree from matrix //initialize indexes map thisIndexes; //maps row in simMatrix to vector index in the tree for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; } //do merges and create tree structure by setting parents and children //there are numGroups - 1 merges to do for (int i = 0; i < (numLeaves - 1); i++) { float largest = -1000.0; if (m->getControl_pressed()) { break; } int row, column; row = 1; column = 0; //find largest value in sims matrix by searching lower triangle for (int j = 1; j < sims.size(); j++) { for (int k = 0; k < j; k++) { if (sims[j][k] > largest) { largest = sims[j][k]; row = j; column = k; } } } //set non-leaf node info and update leaves to know their parents //non-leaf tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]); //parents tree[thisIndexes[row]].setParent(numLeaves + i); tree[thisIndexes[column]].setParent(numLeaves + i); //blength = distance / 2; float blength = ((1.0 - largest) / 2); //branchlengths tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves()); tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves()); //set your length to leaves to your childs length plus branchlength tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength()); //update index thisIndexes[row] = numLeaves+i; thisIndexes[column] = numLeaves+i; //remove highest value that caused the merge. sims[row][column] = -1000.0; sims[column][row] = -1000.0; //merge values in simsMatrix for (int n = 0; n < sims.size(); n++) { //row becomes merge of 2 groups sims[row][n] = (sims[row][n] + sims[column][n]) / 2; sims[n][row] = sims[row][n]; //delete column sims[column][n] = -1000.0; sims[n][column] = -1000.0; } } //adjust tree to make sure root to tip length is .5 int root = findRoot(); tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves())); } catch(exception& e) { m->errorOut(e, "Tree", "Tree"); exit(1); } } /*****************************************************************/ Tree::~Tree() { } /*****************************************************************/ int Tree::getIndex(string searchName) { try { map::iterator itIndex = indexes.find(searchName); if (itIndex != indexes.end()) { return itIndex->second; } return -1; } catch(exception& e) { m->errorOut(e, "Tree", "getIndex"); exit(1); } } /*****************************************************************/ void Tree::setIndex(string searchName, int index) { try { map::iterator itIndex = indexes.find(searchName); if (itIndex == indexes.end()) { indexes[searchName] = index; } } catch(exception& e) { m->errorOut(e, "Tree", "setIndex"); exit(1); } } /*****************************************************************/ int Tree::assembleTree() { try { //initialize groupNodeInfo for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0); } //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { if (m->getControl_pressed()) { return 1; } tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } for(int i = 0; i < numLeaves; i++){ for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); } } return 0; } catch(exception& e) { m->errorOut(e, "Tree", "assembleTree"); exit(1); } } /*****************************************************************/ //assumes leaf node names are in seqs and no names file - used by indicator command and subsample void Tree::getSubTree(Tree* Ctree, vector seqs) { try { //copy Tree since we are going to destroy it vector T = Ctree->getTreeNames(); Tree* copy = new Tree(ct, T); copy->getCopy(Ctree); copy->assembleTree(); //we want to select some of the leaf nodes to create the output tree //go through the input Tree starting at parents of leaves //initialize groupNodeInfo vector namesOfGroups = ct->getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. for (int i = 0; i < numNodes; i++) { //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(seqs[i]); //save group info int maxPars = 1; vector group; vector counts = ct->getGroupCounts(seqs[i]); for (int j = 0; j < namesOfGroups.size(); j++) { if (counts[j] != 0) { //you have seqs from this group groupNodeInfo[namesOfGroups[j]].push_back(i); group.push_back(namesOfGroups[j]); tree[i].pGroups[namesOfGroups[j]] = counts[j]; tree[i].pcount[namesOfGroups[j]] = counts[j]; //keep highest group if(counts[j] > maxPars){ maxPars = counts[j]; } } } tree[i].setGroup(group); setIndex(seqs[i], i); if (maxPars > 1) { //then we have some more dominant groups //erase all the groups that are less than maxPars because you found a more dominant group. for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){ if(it->second < maxPars){ tree[i].pGroups.erase(it++); }else { it++; } } //set one remaining groups to 1 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){ tree[i].pGroups[it->first] = 1; } }//end if //intialize non leaf nodes }else if (i > (numLeaves-1)) { tree[i].setName(""); vector tempGroups; tree[i].setGroup(tempGroups); } } pruneNewTree(copy, seqs); int root = 0; for (int i = 0; i < copy->getNumNodes(); i++) { //you found the root if (copy->tree[i].getParent() == -1) { root = i; break; } } int nextSpot = numLeaves; populateNewTree(copy->tree, root, nextSpot); //update treenames to reflect who is still in tree Treenames = seqs; delete copy; } catch(exception& e) { m->errorOut(e, "Tree", "getSubTree"); exit(1); } } /*****************************************************************/ void Tree::pruneNewTree(Tree* copy, vector namesToInclude) { try { Utils util; set removedLeaves; for (int i = 0; i < copy->getNumLeaves(); i++) { if (removedLeaves.count(i) == 0) { //am I in the group int parent = copy->tree[i].getParent(); if (parent != -1) { if (util.inUsersGroups(copy->tree[i].getName(), namesToInclude)) { //find my siblings name int parentRC = copy->tree[parent].getRChild(); int parentLC = copy->tree[parent].getLChild(); //if I am the right child, then my sib is the left child int sibIndex = parentRC; if (parentRC == i) { sibIndex = parentLC; } string sibsName = copy->tree[sibIndex].getName(); //if yes, is my sibling if ((util.inUsersGroups(sibsName, namesToInclude)) || (sibsName == "")) { //we both are okay no trimming required }else{ //i am, my sib is not, so remove sib by setting my parent to my grandparent int grandparent = copy->tree[parent].getParent(); int grandparentLC = copy->tree[grandparent].getLChild(); int grandparentRC = copy->tree[grandparent].getRChild(); //whichever of my granparents children was my parent now equals me if (grandparentLC == parent) { grandparentLC = i; } else { grandparentRC = i; } copy->tree[i].setParent(grandparent); copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength())); if (grandparent != -1) { copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); } removedLeaves.insert(sibIndex); } }else{ //find my siblings name int parentRC = copy->tree[parent].getRChild(); int parentLC = copy->tree[parent].getLChild(); //if I am the right child, then my sib is the left child int sibIndex = parentRC; if (parentRC == i) { sibIndex = parentLC; } string sibsName = copy->tree[sibIndex].getName(); //if no is my sibling if ((util.inUsersGroups(sibsName, namesToInclude)) || (sibsName == "")) { //i am not, but my sib is int grandparent = copy->tree[parent].getParent(); int grandparentLC = copy->tree[grandparent].getLChild(); int grandparentRC = copy->tree[grandparent].getRChild(); //whichever of my granparents children was my parent now equals my sib if (grandparentLC == parent) { grandparentLC = sibIndex; } else { grandparentRC = sibIndex; } copy->tree[sibIndex].setParent(grandparent); copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength())); if (grandparent != -1) { copy->tree[grandparent].setChildren(grandparentLC, grandparentRC); } removedLeaves.insert(i); }else{ //neither of us are, so we want to eliminate ourselves and our parent //so set our parents sib to our great-grandparent int parent = copy->tree[i].getParent(); int grandparent = copy->tree[parent].getParent(); int parentsSibIndex; if (grandparent != -1) { int greatgrandparent = copy->tree[grandparent].getParent(); int greatgrandparentLC, greatgrandparentRC; if (greatgrandparent != -1) { greatgrandparentLC = copy->tree[greatgrandparent].getLChild(); greatgrandparentRC = copy->tree[greatgrandparent].getRChild(); } int grandparentLC = copy->tree[grandparent].getLChild(); int grandparentRC = copy->tree[grandparent].getRChild(); parentsSibIndex = grandparentLC; if (grandparentLC == parent) { parentsSibIndex = grandparentRC; } //whichever of my greatgrandparents children was my grandparent if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; } else { greatgrandparentRC = parentsSibIndex; } copy->tree[parentsSibIndex].setParent(greatgrandparent); copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength())); if (greatgrandparent != -1) { copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC); } }else{ copy->tree[parent].setParent(-1); } removedLeaves.insert(sibIndex); removedLeaves.insert(i); } } } } } } catch(exception& e) { m->errorOut(e, "Tree", "pruneNewTree"); exit(1); } } /*****************************************************************/ int Tree::populateNewTree(vector& oldtree, int node, int& index) { try { if (oldtree[node].getLChild() != -1) { int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index); int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index); tree[index].setChildren(lc, rc); tree[rc].setParent(index); tree[lc].setParent(index); tree[index].setBranchLength(oldtree[node].getBranchLength()); tree[rc].setBranchLength(oldtree[oldtree[node].getLChild()].getBranchLength()); tree[lc].setBranchLength(oldtree[oldtree[node].getRChild()].getBranchLength()); return (index++); }else { //you are a leaf int indexInNewTree = getIndex(oldtree[node].getName()); return indexInNewTree; } } catch(exception& e) { m->errorOut(e, "Tree", "populateNewTree"); exit(1); } } /*****************************************************************/ void Tree::getCopy(Tree* copy, bool subsample) { try { //for each node in the tree copy its info for (int i = 0; i < numNodes; i++) { //copy branch length tree[i].setBranchLength(copy->tree[i].getBranchLength()); //copy parent tree[i].setParent(copy->tree[i].getParent()); //copy children tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); } //build the pGroups in non leaf nodes to be used in the parsimony calcs. for (int i = numLeaves; i < numNodes; i++) { if (m->getControl_pressed()) { break; } tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } } catch(exception& e) { m->errorOut(e, "Tree", "getCopy"); exit(1); } } /*****************************************************************/ void Tree::getCopy(Tree* copy) { try { //for each node in the tree copy its info for (int i = 0; i < numNodes; i++) { //copy name tree[i].setName(copy->tree[i].getName()); //copy group tree[i].setGroup(copy->tree[i].getGroup()); //copy branch length tree[i].setBranchLength(copy->tree[i].getBranchLength()); //copy parent tree[i].setParent(copy->tree[i].getParent()); //copy children tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild()); //copy index in node and tmap setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName())); tree[i].setIndex(copy->tree[i].getIndex()); //copy pGroups tree[i].pGroups = copy->tree[i].pGroups; //copy pcount tree[i].pcount = copy->tree[i].pcount; } groupNodeInfo = copy->groupNodeInfo; } catch(exception& e) { m->errorOut(e, "Tree", "getCopy"); exit(1); } } /*****************************************************************/ //returns a map with a groupname and the number of times that group was seen in the children //for instance if your children are white and black then it would return a map with 2 entries // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1. map Tree::mergeGroups(int i) { try { int lc = tree[i].getLChild(); int rc = tree[i].getRChild(); //set parsimony groups to left child map parsimony = tree[lc].pGroups; int maxPars = 1; //look at right child groups and update maxPars if right child has something higher for that group. for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){ it2 = parsimony.find(it->first); if (it2 != parsimony.end()) { parsimony[it->first]++; } else { parsimony[it->first] = 1; } if(parsimony[it->first] > maxPars){ maxPars = parsimony[it->first]; } } // this is true if right child had a greater parsimony for a certain group if(maxPars > 1){ //erase all the groups that are only 1 because you found something with 2. for(it=parsimony.begin();it!=parsimony.end();){ if(it->second == 1){ parsimony.erase(it++); }else { it++; } } //set one remaining groups to 1 //so with our above example p[white] = 2 would be left and it would become p[white] = 1 for(it=parsimony.begin();it!=parsimony.end();it++){ parsimony[it->first] = 1; } } return parsimony; } catch(exception& e) { m->errorOut(e, "Tree", "mergeGroups"); exit(1); } } /*****************************************************************/ //returns a map with a groupname and the number of times that group was seen in the children //for instance if your children are white and black then it would return a map with 2 entries // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1. map Tree::mergeUserGroups(int i, vector g) { try { int lc = tree[i].getLChild(); int rc = tree[i].getRChild(); Utils util; //loop through nodes groups removing the ones the user doesn't want for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){ if (util.inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it++); }else { it++; } } //loop through nodes groups removing the ones the user doesn't want for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){ if (util.inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it++); }else { it++; } } //set parsimony groups to left child map parsimony = tree[lc].pGroups; int maxPars = 1; //look at right child groups and update maxPars if right child has something higher for that group. for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){ it2 = parsimony.find(it->first); if (it2 != parsimony.end()) { parsimony[it->first]++; }else { parsimony[it->first] = 1; } if(parsimony[it->first] > maxPars){ maxPars = parsimony[it->first]; } } // this is true if right child had a greater parsimony for a certain group if(maxPars > 1){ //erase all the groups that are only 1 because you found something with 2. for(it=parsimony.begin();it!=parsimony.end();){ if(it->second == 1){ parsimony.erase(it++); }else { it++; } } for(it=parsimony.begin();it!=parsimony.end();it++){ parsimony[it->first] = 1; } } return parsimony; } catch(exception& e) { m->errorOut(e, "Tree", "mergeUserGroups"); exit(1); } } /**************************************************************************************************/ map Tree::mergeGcounts(int position) { try{ map::iterator pos; int lc = tree[position].getLChild(); int rc = tree[position].getRChild(); map sum = tree[lc].pcount; for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){ sum[it->first] += it->second; } return sum; } catch(exception& e) { m->errorOut(e, "Tree", "mergeGcounts"); exit(1); } } /**************************************************************************************************/ int Tree::randomLabels(vector& nodesToSwap) { try { if (nodesToSwap.size() < 1) { return 0; } //nothing to swap for(int j = 0; j < nodesToSwap.size()-1;){ if (m->getControl_pressed()) { break; } int z = nodesToSwap[j]; int i = nodesToSwap[j+1]; swapLabels(z,i); j += 2; } return 0; } catch(exception& e) { m->errorOut(e, "Tree", "randomLabels"); exit(1); } } /**************************************************************************************************/ //you only want to randomize the nodes that are from a group the user wants analyzed, so //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them. int Tree::swapLabels(int first, int second) { try { if ((first > numLeaves) || (second > numLeaves)) { m->mothurOut("[ERROR]: cannot swap tree indexes.\n"); m->setControl_pressed(true); return 0; } //switches node i and node z's info. map lib_hold = tree[first].pGroups; tree[first].pGroups = (tree[second].pGroups); tree[second].pGroups = (lib_hold); vector zgroup = tree[first].getGroup(); tree[first].setGroup(tree[second].getGroup()); tree[second].setGroup(zgroup); string zname = tree[first].getName(); tree[first].setName(tree[second].getName()); setIndex(tree[second].getName(), first); tree[second].setName(zname); setIndex(zname, second); map gcount_hold = tree[first].pcount; tree[first].pcount = (tree[second].pcount); tree[second].pcount = (gcount_hold); return 1; } catch(exception& e) { m->errorOut(e, "Tree", "swapLabels"); exit(1); } } /*************************************************************************************************/ void Tree::assembleRandomUnifracTree(vector g) { randomLabels(g); assembleTree(); } /*************************************************************************************************/ //for now it's just random topology but may become random labels as well later that why this is such a simple function now... void Tree::assembleRandomTree(Utils* myUtil) { randomTopology(myUtil); assembleTree(); } /**************************************************************************************************/ void Tree::randomTopology(Utils* myUtil) { try { for(int i=0;igetRandomIndex(i); if(tree[rnd_index1].getParent() == -1){escape = 1;} } escape = 0; while(escape == 0){ rnd_index2 = myUtil->getRandomIndex(i); if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){ escape = 1; } } tree[i].setChildren(rnd_index1,rnd_index2); tree[i].setParent(-1); tree[rnd_index1].setParent(i); tree[rnd_index2].setParent(i); } } catch(exception& e) { m->errorOut(e, "Tree", "randomTopology"); exit(1); } } /*****************************************************************/ vector Tree::getNodes(vector theseGroups) { try { set nodes; for (int i = 0; i < theseGroups.size(); i++) { if (m->getControl_pressed()) { break; } map >::iterator it = groupNodeInfo.find(theseGroups[i]); if (it != groupNodeInfo.end()) {//we have nodes for this group for (int j = 0; j < (it->second).size(); j++) { nodes.insert((it->second)[j]); } //removes dups } } vector uniqueNodes; for (set::iterator it = nodes.begin(); it != nodes.end(); it++) { uniqueNodes.push_back(*it); } return uniqueNodes; } catch(exception& e) { m->errorOut(e, "Tree", "getNodes"); exit(1); } } /*****************************************************************/ void Tree::print(ostream& out) { try { int root = findRoot(); printBranch(root, out, "branch"); out << ";" << endl; } catch(exception& e) { m->errorOut(e, "Tree", "print"); exit(1); } } /*****************************************************************/ void Tree::print(ostream& out, map nameMap) { try { int root = findRoot(); printBranch(root, out, nameMap); out << ";" << endl; } catch(exception& e) { m->errorOut(e, "Tree", "print"); exit(1); } } /*****************************************************************/ void Tree::print(ostream& out, string mode) { try { int root = findRoot(); printBranch(root, out, mode); out << ";" << endl; } catch(exception& e) { m->errorOut(e, "Tree", "print"); exit(1); } } /*****************************************************************/ // This prints out the tree in Newick form. void Tree::createNewickFile(string f) { try { int root = findRoot(); filename = f; Utils util; util.openOutputFile(filename, out); printBranch(root, out, "branch"); // you are at the end of the tree out << ";" << endl; out.close(); } catch(exception& e) { m->errorOut(e, "Tree", "createNewickFile"); exit(1); } } /*****************************************************************/ //This function finds the index of the root node. int Tree::findRoot() { try { for (int i = 0; i < numNodes; i++) { //you found the root if (tree[i].getParent() == -1) { return i; } } return -1; } catch(exception& e) { m->errorOut(e, "Tree", "findRoot"); exit(1); } } /*****************************************************************/ void Tree::printBranch(int node, ostream& out, map names) { try { // you are not a leaf if (tree[node].getLChild() != -1) { out << "("; printBranch(tree[node].getLChild(), out, names); out << ","; printBranch(tree[node].getRChild(), out, names); out << ")"; //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { out << ":" << tree[node].getBranchLength(); } }else { //you are a leaf map::iterator itNames = names.find(tree[node].getName()); Utils util; string outputString = ""; if (itNames != names.end()) { vector dupNames; util.splitAtComma((itNames->second), dupNames); if (dupNames.size() == 1) { outputString += tree[node].getName(); if (!util.isEqual(tree[node].getBranchLength(), -1)) { outputString += ":" + toString(tree[node].getBranchLength()); } }else { outputString += "("; for (int u = 0; u < dupNames.size()-1; u++) { outputString += dupNames[u]; if (!util.isEqual(tree[node].getBranchLength(), -1)) { outputString += ":" + toString(0.0); } outputString += ","; } outputString += dupNames[dupNames.size()-1]; if (!util.isEqual(tree[node].getBranchLength(), -1)) { outputString += ":" + toString(0.0); } outputString += ")"; if (!util.isEqual(tree[node].getBranchLength(), -1)) { outputString += ":" + toString(tree[node].getBranchLength()); } } }else { outputString = tree[node].getName(); //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { outputString += ":" + toString(tree[node].getBranchLength()); } m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct.\n"); } out << outputString; } } catch(exception& e) { m->errorOut(e, "Tree", "printBranch"); exit(1); } } /*****************************************************************/ void Tree::printBranch(int node, ostream& out, string mode) { try { // you are not a leaf if (tree[node].getLChild() != -1) { out << "("; printBranch(tree[node].getLChild(), out, mode); out << ","; printBranch(tree[node].getRChild(), out, mode); out << ")"; if (mode == "branch") { //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { out << ":" << tree[node].getBranchLength(); } }else if (mode == "boot") { //if there is a label then print it if (tree[node].getLabel() != "") { out << tree[node].getLabel(); } }else if (mode == "both") { if (tree[node].getLabel() != "") { out << tree[node].getLabel(); } //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { out << ":" << tree[node].getBranchLength(); } } }else { //you are a leaf vector leafGroup = ct->getGroups(tree[node].getName()); if (mode == "branch") { out << leafGroup[0]; //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { out << ":" << tree[node].getBranchLength(); } }else if (mode == "boot") { out << leafGroup[0]; //if there is a label then print it if (tree[node].getLabel() != "") { out << tree[node].getLabel(); } }else if (mode == "both") { out << tree[node].getName(); if (tree[node].getLabel() != "") { out << tree[node].getLabel(); } //if there is a branch length then print it if (!util.isEqual(tree[node].getBranchLength(), -1)) { out << ":" << tree[node].getBranchLength(); } } } } catch(exception& e) { m->errorOut(e, "Tree", "printBranch"); exit(1); } } /*****************************************************************/ void Tree::printBranch(int node, ostream& out, string mode, vector& theseNodes) { try { // you are not a leaf if (theseNodes[node].getLChild() != -1) { out << "("; printBranch(theseNodes[node].getLChild(), out, mode); out << ","; printBranch(theseNodes[node].getRChild(), out, mode); out << ")"; if (mode == "branch") { //if there is a branch length then print it if (!util.isEqual(theseNodes[node].getBranchLength(), -1)) { out << ":" << theseNodes[node].getBranchLength(); } }else if (mode == "boot") { //if there is a label then print it if (theseNodes[node].getLabel() != "") { out << theseNodes[node].getLabel(); } }else if (mode == "both") { if (theseNodes[node].getLabel() != "") { out << theseNodes[node].getLabel(); } //if there is a branch length then print it if (!util.isEqual(theseNodes[node].getBranchLength(), -1)) { out << ":" << theseNodes[node].getBranchLength(); } } }else { //you are a leaf vector leafGroup = ct->getGroups(theseNodes[node].getName()); if (mode == "branch") { out << leafGroup[0]; //if there is a branch length then print it if (!util.isEqual(theseNodes[node].getBranchLength(), -1)) { out << ":" << theseNodes[node].getBranchLength(); } }else if (mode == "boot") { out << leafGroup[0]; //if there is a label then print it if (theseNodes[node].getLabel() != "") { out << theseNodes[node].getLabel(); } }else if (mode == "both") { out << theseNodes[node].getName(); if (theseNodes[node].getLabel() != "") { out << theseNodes[node].getLabel(); } //if there is a branch length then print it if (!util.isEqual(theseNodes[node].getBranchLength(), -1)) { out << ":" << theseNodes[node].getBranchLength(); } } } } catch(exception& e) { m->errorOut(e, "Tree", "printBranch"); exit(1); } } /*****************************************************************/ void Tree::printTree() { for(int i=0;i&); Tree(CountTable*, vector&); Tree(CountTable*, vector< vector >&, vector&); //create tree from sim matrix ~Tree(); CountTable* getCountTable() { return ct; } vector getTreeNames() { return Treenames; } void getCopy(Tree*); //makes tree a copy of the one passed in. void getCopy(Tree* copy, bool); //makes a copy of the tree structure passed in, (just parents, children and br). Used with the Tree(TreeMap*) constructor. Assumes the tmap already has set seqs groups you want. Used by subsample to reassign seqs you don't want included to group "doNotIncludeMe". void getSubTree(Tree*, vector); //makes tree a that contains only the names passed in. //this function takes the leaf info and populates the non leaf nodes int assembleTree(); void assembleRandomTree(Utils*); //pass tree indexes in random order void assembleRandomUnifracTree(vector); //pass nodes to swap in random order void createNewickFile(string); int getIndex(string); void setIndex(string, int); int getNumNodes() { return numNodes; } int getNumLeaves(){ return numLeaves; } map mergeUserGroups(int, vector); //returns a map with a groupname and the number of times that group was seen in the children void printTree(); void print(ostream&); void print(ostream&, string); void print(ostream&, map); int findRoot(); //return index of root node vector tree; //the first n nodes are the leaves, where n is the number of sequences. map< string, vector > groupNodeInfo; //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file. vector getNodes(vector); //return tree indexes of nodes for groups passed in private: MothurOut* m; vector Treenames; Utils util; CountTable* ct; int numNodes, numLeaves; ofstream out; string filename; //map names; map::iterator it, it2; map mergeGroups(int); //returns a map with a groupname and the number of times that group was seen in the children map mergeGcounts(int); map indexes; //maps seqName -> index in tree vector int randomLabels(vector& nodesToSwap); int swapLabels(int first, int second); void addNamesToCounts(map); void randomTopology(Utils*); void randomLabels(vector); void printBranch(int, ostream&, map); //recursively print out tree void printBranch(int, ostream&, string); int populateNewTree(vector&, int, int&); void printBranch(int, ostream&, string, vector&); void pruneNewTree(Tree* copy, vector namesToInclude); }; #endif mothur-1.48.0/source/datastructures/treemap.cpp000077500000000000000000000307111424121717000216440ustar00rootroot00000000000000/* * treemap.cpp * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treemap.h" /************************************************************/ TreeMap::TreeMap(string filename) { m = MothurOut::getInstance(); ofstream out2; util.openOutputFileAppend(filename, out2); out2 << endl; out2.close(); groupFileName = filename; util.openInputFile(filename, fileHandle); } /************************************************************/ TreeMap::~TreeMap(){} /************************************************************/ int TreeMap::readMap(string gf) { try { ofstream out2; util.openOutputFileAppend(gf, out2); out2 << endl; out2.close(); groupFileName = gf; util.openInputFile(gf, fileHandle); string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { setNamesOfGroups(seqGroup); map::iterator itCheck = treemap.find(seqName); if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { namesOfSeqs.push_back(seqName); treemap[seqName].groupname = seqGroup; //store data in map it2 = seqsPerGroup.find(seqGroup); if (it2 == seqsPerGroup.end()) { //if it's a new group seqsPerGroup[seqGroup] = 1; }else {//it's a group we already have seqsPerGroup[seqGroup]++; } } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { setNamesOfGroups(seqGroup); map::iterator itCheck = treemap.find(seqName); if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { namesOfSeqs.push_back(seqName); treemap[seqName].groupname = seqGroup; //store data in map it2 = seqsPerGroup.find(seqGroup); if (it2 == seqsPerGroup.end()) { //if it's a new group seqsPerGroup[seqGroup] = 1; }else {//it's a group we already have seqsPerGroup[seqGroup]++; } } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "TreeMap", "readMap"); exit(1); } } /************************************************************/ int TreeMap::readMap() { try { string seqName, seqGroup; int error = 0; string rest = ""; char buffer[4096]; bool pairDone = false; bool columnOne = true; while (!fileHandle.eof()) { if (m->getControl_pressed()) { fileHandle.close(); return 1; } fileHandle.read(buffer, 4096); vector pieces = util.splitWhiteSpace(rest, buffer, fileHandle.gcount()); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { setNamesOfGroups(seqGroup); map::iterator itCheck = treemap.find(seqName); if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { namesOfSeqs.push_back(seqName); treemap[seqName].groupname = seqGroup; //store data in map it2 = seqsPerGroup.find(seqGroup); if (it2 == seqsPerGroup.end()) { //if it's a new group seqsPerGroup[seqGroup] = 1; }else {//it's a group we already have seqsPerGroup[seqGroup]++; } } pairDone = false; } } } fileHandle.close(); if (rest != "") { vector pieces = util.splitWhiteSpace(rest); for (int i = 0; i < pieces.size(); i++) { if (columnOne) { seqName = pieces[i]; columnOne=false; } else { seqGroup = pieces[i]; pairDone = true; columnOne=true; } if (pairDone) { setNamesOfGroups(seqGroup); map::iterator itCheck = treemap.find(seqName); if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct.\n"); } else { namesOfSeqs.push_back(seqName); treemap[seqName].groupname = seqGroup; //store data in map it2 = seqsPerGroup.find(seqGroup); if (it2 == seqsPerGroup.end()) { //if it's a new group seqsPerGroup[seqGroup] = 1; }else {//it's a group we already have seqsPerGroup[seqGroup]++; } } pairDone = false; } } } return error; } catch(exception& e) { m->errorOut(e, "TreeMap", "readMap"); exit(1); } } /************************************************************/ void TreeMap::addSeq(string seqName, string seqGroup) { namesOfSeqs.push_back(seqName); setNamesOfGroups(seqGroup); treemap[seqName].groupname = seqGroup; //store data in map it2 = seqsPerGroup.find(seqGroup); if (it2 == seqsPerGroup.end()) { //if it's a new group seqsPerGroup[seqGroup] = 1; }else {//it's a group we already have seqsPerGroup[seqGroup]++; } } /************************************************************/ void TreeMap::removeSeq(string seqName) { //erase name from namesOfSeqs for (int i = 0; i < namesOfSeqs.size(); i++) { if (namesOfSeqs[i] == seqName) { namesOfSeqs.erase(namesOfSeqs.begin()+i); break; } } //decrement sequences in this group string group = treemap[seqName].groupname; seqsPerGroup[group]--; //remove seq from treemap it = treemap.find(seqName); treemap.erase(it); } /************************************************************/ int TreeMap::getNumGroups() { return namesOfGroups.size(); } /************************************************************/ int TreeMap::getNumSeqs() { return namesOfSeqs.size(); } /************************************************************/ string TreeMap::getGroup(string sequenceName) { it = treemap.find(sequenceName); if (it != treemap.end()) { //sequence name was in group file return it->second.groupname; }else { return "not found"; } } /************************************************************/ void TreeMap::setNamesOfGroups(string seqGroup) { int i, count; count = 0; for (i=0; ierrorOut(e, "TreeMap", "isValidGroup"); exit(1); } } /***********************************************************************/ void TreeMap::print(ostream& output){ try { for(it = treemap.begin(); it != treemap.end(); it++){ output << it->first << '\t' << it->second.groupname << '\t' << it->second.vectorIndex << endl; } } catch(exception& e) { m->errorOut(e, "TreeMap", "print"); exit(1); } } /************************************************************/ void TreeMap::makeSim(vector ThisnamesOfGroups) { try { //set names of groups namesOfGroups = ThisnamesOfGroups; //set names of seqs to names of groups namesOfSeqs = ThisnamesOfGroups; // make map where key and value are both the group name since that what the tree.shared command wants for (int i = 0; i < namesOfGroups.size(); i++) { treemap[namesOfGroups[i]].groupname = namesOfGroups[i]; seqsPerGroup[namesOfGroups[i]] = 1; } numGroups = namesOfGroups.size(); } catch(exception& e) { m->errorOut(e, "TreeMap", "makeSim"); exit(1); } } /************************************************************/ void TreeMap::makeSim(ListVector* list) { try { //set names of groups namesOfGroups.clear(); for(int i = 0; i < list->size(); i++) { namesOfGroups.push_back(list->get(i)); } //set names of seqs to names of groups namesOfSeqs = namesOfGroups; // make map where key and value are both the group name since that what the tree.shared command wants for (int i = 0; i < namesOfGroups.size(); i++) { treemap[namesOfGroups[i]].groupname = namesOfGroups[i]; seqsPerGroup[namesOfGroups[i]] = 1; } numGroups = namesOfGroups.size(); } catch(exception& e) { m->errorOut(e, "TreeMap", "makeSim"); exit(1); } } /************************************************************/ int TreeMap::getCopy(TreeMap& copy){ try { namesOfGroups = copy.getNamesOfGroups(); numGroups = copy.getNumGroups(); namesOfSeqs = copy.namesOfSeqs; seqsPerGroup = copy.seqsPerGroup; treemap = copy.treemap; return 0; } catch(exception& e) { m->errorOut(e, "TreeMap", "getCopy"); exit(1); } } /************************************************************/ vector TreeMap::getNamesSeqs(){ try { vector names; for(it = treemap.begin(); it != treemap.end(); it++){ names.push_back(it->first); } return names; } catch(exception& e) { m->errorOut(e, "TreeMap", "getNamesSeqs"); exit(1); } } /************************************************************/ vector TreeMap::getNamesSeqs(vector picked){ try { vector names; for(it = treemap.begin(); it != treemap.end(); it++){ //if you are belong to one the the groups in the picked vector add you if (util.inUsersGroups(it->second.groupname, picked)) { names.push_back(it->first); } } return names; } catch(exception& e) { m->errorOut(e, "TreeMap", "getNamesSeqs"); exit(1); } } /************************************************************/ mothur-1.48.0/source/datastructures/treemap.h000077500000000000000000000036361424121717000213170ustar00rootroot00000000000000#ifndef TREEMAP_H #define TREEMAP_H /* * treemap.h * Mothur * * Created by Sarah Westcott on 1/26/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "listvector.hpp" /* This class is used by the read.tree command to build the tree container. */ struct GroupIndex { string groupname; int vectorIndex; }; class TreeMap { public: TreeMap() { m = MothurOut::getInstance(); } TreeMap(string); ~TreeMap(); int readMap(); int readMap(string); int getNumGroups(); int getNumSeqs(); //void setIndex(string, int); //sequencename, index //int getIndex(string); //returns vector index of sequence bool isValidGroup(string); //return true if string is a valid group void removeSeq(string); //removes a sequence, this is to accomadate trees that do not contain all the seqs in your groupfile string getGroup(string); void addSeq(string, string); void addGroup(string s) { setNamesOfGroups(s); } vector getNamesOfGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; } void print(ostream&); void makeSim(vector); //takes groupmap info and fills treemap for use by tree.shared command. void makeSim(ListVector*); //takes listvector info and fills treemap for use by tree.shared command. vector getNamesSeqs(); vector getNamesSeqs(vector); //get names of seqs belonging to a group or set of groups int getCopy(TreeMap&); vector namesOfSeqs; map seqsPerGroup; //groupname, number of seqs in that group. map treemap; //sequence name and private: vector namesOfGroups; ifstream fileHandle; string groupFileName; int numGroups; map::iterator it; map::iterator it2; void setNamesOfGroups(string); MothurOut* m; Utils util; }; #endif mothur-1.48.0/source/datastructures/treenode.cpp000077500000000000000000000054531424121717000220210ustar00rootroot00000000000000/* * treenode.cpp * Mothur * * Created by Sarah Westcott on 1/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "treenode.h" /****************************************************************/ Node::Node() { m = MothurOut::getInstance(); //initialize node name = ""; branchLength = -1; parent = -1; lchild = -1; rchild = -1; length2leaf = 0.0; label = ""; } /****************************************************************/ void Node::setName(string Name) { name = Name; } /****************************************************************/ void Node::setGroup(vector groups) { group =groups; } /****************************************************************/ void Node::setBranchLength(float l) { branchLength = l; } /****************************************************************/ void Node::setLabel(string l) { label = l; } /****************************************************************/ void Node::setLengthToLeaves(float l) { length2leaf = l; } /****************************************************************/ void Node::setParent(int p) { parent = p; } /****************************************************************/ void Node::setIndex(int i) { vectorIndex = i; } /****************************************************************/ void Node::setChildren(int lc, int rc) { lchild = lc; rchild = rc; } //leftchild, rightchild /****************************************************************/ string Node::getName() { return name; } /****************************************************************/ vector Node::getGroup() { return group; } /****************************************************************/ float Node::getBranchLength() { return branchLength; } /****************************************************************/ string Node::getLabel() { return label; } /****************************************************************/ float Node::getLengthToLeaves() { return length2leaf; } /****************************************************************/ int Node::getParent() { return parent; } /****************************************************************/ int Node::getLChild() { return lchild; } /****************************************************************/ int Node::getRChild() { return rchild; } /****************************************************************/ int Node::getIndex() { return vectorIndex; } /****************************************************************/ //to be used by printTree in the Tree class to print the leaf info void Node::printNode() { try{ m->mothurOut(name + " " + toString(parent) + " " + toString(lchild) + " " + toString(rchild) + " \n"); } catch(exception& e) { m->errorOut(e, "Node", "printNode"); exit(1); } } /****************************************************************/ mothur-1.48.0/source/datastructures/treenode.h000077500000000000000000000033141424121717000214600ustar00rootroot00000000000000#ifndef TREENODE_H #define TREENODE_H /* * treenode.h * Mothur * * Created by Sarah Westcott on 1/23/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" /* This class represents a node on a tree. */ class Node { public: Node(); //pass it the sequence name ~Node() { pGroups.clear(); pcount.clear(); }; void setName(string); void setGroup(vector); void setBranchLength(float); void setLabel(string); void setParent(int); void setChildren(int, int); //leftchild, rightchild void setIndex(int); void setLengthToLeaves(float); string getName(); vector getGroup(); float getBranchLength(); float getLengthToLeaves(); string getLabel(); int getParent(); int getLChild(); int getRChild(); int getIndex(); void printNode(); //prints out the name and the branch length //pGroup is the parsimony group info. i.e. for a leaf node it would contain 1 enter pGroup["groupname"] = 1; //but for a branch node it may contain several entries so if the nodes children are from different groups it //would have at least two entries pgroup["groupnameOfLeftChild"] = 1, pgroup["groupnameOfRightChild"] = 1. //pCount is the nodes descendant group infomation. i.e. pCount["black"] = 20 would mean that 20 of the nodes //descendant are from group black. map pGroups; //leaf nodes will only have 1 group, but branch nodes may have multiple groups. map pcount; private: string name, label; vector group; float branchLength, length2leaf; int parent; int lchild; int rchild; int vectorIndex; MothurOut* m; }; #endif mothur-1.48.0/source/display.h000077500000000000000000000016671424121717000162540ustar00rootroot00000000000000#ifndef DISPLAY_H #define DISPLAY_H #include "calculator.h" #include "fileoutput.h" /***********************************************************************/ class Display { public: virtual void update(SAbundVector& rank) = 0; virtual void update(vector shared, int numSeqs) {} virtual void update(vector& shared, int numSeqs, bool pairs, map) {} virtual void init(string) = 0; virtual void reset() = 0; virtual void close() = 0; virtual bool isCalcMultiple() = 0; virtual void setAll(bool){} virtual bool hasLciHci(){ return false; } virtual bool getAll() { bool a; return a; } virtual bool calcNeedsAll() { bool a; return a; } virtual string getName() { return ""; }; virtual ~Display() = default; Display() { m = MothurOut::getInstance(); } protected: MothurOut* m; }; /***********************************************************************/ #endif mothur-1.48.0/source/dlibshuff.cpp000077500000000000000000000040061424121717000170760ustar00rootroot00000000000000/* * DLibshuff.cpp * Mothur * * Created by Pat Schloss on 4/8/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "dlibshuff.h" /***********************************************************************/ DLibshuff::DLibshuff(FullMatrix* D, int it, float step, float co) : Libshuff(D, it, step, co){ numDXs = int(cutOff / stepSize); } /***********************************************************************/ float DLibshuff::evaluatePair(int i, int j){ return dCalculate(i,j); } /***********************************************************************/ vector > DLibshuff::evaluateAll(){ savedMins.resize(numGroups); vector > dCXYValues(numGroups); for(int i=0;igetControl_pressed()) { return sum; } minXY = getMinXY(x, y); if (m->getControl_pressed()) { return sum; } vector nx = calcN(minX); if (m->getControl_pressed()) { return sum; } vector nxy = calcN(minXY); if (m->getControl_pressed()) { return sum; } for(int i=0;i DLibshuff::calcN(vector minVector){ vector counts(numDXs,0); int precision = int(1 / stepSize); for(int i=0;i > evaluateAll(); float evaluatePair(int, int); private: int numDXs; double dCalculate(int, int); vector calcN(vector); }; #endif mothur-1.48.0/source/endiannessmacros.h000077500000000000000000000106161424121717000201350ustar00rootroot00000000000000#ifndef EDIANNESSMACROS_H #define EDIANNESSMACROS_H /* * endiannessmacros.h * Mothur * * Created by westcott on 7/9/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ /*********************************************************************/ /*********************************************************************/ // The following is copied from the staden io_lib-1.12.4 os.h - thanks! /*********************************************************************/ /*********************************************************************/ /* * Author: * MRC Laboratory of Molecular Biology * Hills Road * Cambridge CB2 2QH * United Kingdom * * Description: operating system specific type definitions * */ /* Mac FAT binaries or unknown. Auto detect based on CPU type */ #if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN) /* * x86 equivalents */ #if defined(__i386) || defined(__i386__) || defined(__ia64__) || defined(WIN32) || defined(__arm__) || (defined(__mips__) && defined(__MIPSEL__)) || defined(__SYMBIAN32__) || \ defined(__x86_64__) || defined(__x86_64) || defined(__i686__) || defined(__i686) || defined(__amd64__) || defined(__amd64) || defined(__LITTLE_ENDIAN__) #define SP_LITTLE_ENDIAN #else #define SP_BIG_ENDIAN #endif /* * SUN Sparc */ #if defined(__sparc__) || defined(__sparc) # if defined(SP_LITTLE_ENDIAN) # undef SP_LITTLE_ENDIAN # endif # define SP_BIG_ENDIAN #endif /* Some catch-alls */ #if defined(__LITTLE_ENDIAN__) || defined(__LITTLEENDIAN__) # define SP_LITTLE_ENDIAN #endif #if defined(__BIG_ENDIAN__) || defined(__BIGENDIAN__) # define SP_BIG_ENDIAN #endif #if defined(SP_BIG_ENDIAN) && defined(SP_LITTLE_ENDIAN) # error Both BIG and LITTLE endian defined. Fix os.h and/or Makefile #endif #if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN) # error Neither BIG nor LITTLE endian defined. Fix os.h and/or Makefile #endif #endif /*----------------------------------------------------------------------------- * Byte swapping macros */ /* * Our new swap runs at the same speed on Ultrix, but substantially faster * (300% for swap_int4, ~50% for swap_int2) on an Alpha (due to the lack of * decent 'char' support). * * They also have the ability to swap in situ (src == dst). Newer code now * relies on this so don't change back! */ #define iswap_int8(x) \ (((x & 0x00000000000000ffLL) << 56) + \ ((x & 0x000000000000ff00LL) << 40) + \ ((x & 0x0000000000ff0000LL) << 24) + \ ((x & 0x00000000ff000000LL) << 8) + \ ((x & 0x000000ff00000000LL) >> 8) + \ ((x & 0x0000ff0000000000LL) >> 24) + \ ((x & 0x00ff000000000000LL) >> 40) + \ ((x & 0xff00000000000000LL) >> 56)) #define iswap_int4(x) \ (((x & 0x000000ff) << 24) + \ ((x & 0x0000ff00) << 8) + \ ((x & 0x00ff0000) >> 8) + \ ((x & 0xff000000) >> 24)) #define iswap_int2(x) \ (((x & 0x00ff) << 8) + \ ((x & 0xff00) >> 8)) #define swap_int8(src, dst) ((dst) = iswap_int8(src)) #define swap_int4(src, dst) ((dst) = iswap_int4(src)) #define swap_int2(src, dst) ((dst) = iswap_int2(src)) /* * Linux systems may use byteswap.h to get assembly versions of byte-swap * on intel systems. This can be as trivial as the bswap opcode, which works * out at over 2-times faster than iswap_int4 above. */ #if 0 #if defined(__linux__) # include # undef iswap_int8 # undef iswap_int4 # undef iswap_int2 # define iswap_int8 bswap_64 # define iswap_int4 bswap_32 # define iswap_int2 bswap_16 #endif #endif /* * Macros to specify that data read in is of a particular endianness. * The macros here swap to the appropriate order for the particular machine * running the macro and return the new answer. These may also be used when * writing to a file to specify that we wish to write in (eg) big endian * format. * * This leads to efficient code as most of the time these macros are * trivial. */ #ifdef SP_BIG_ENDIAN #define be_int8(x) (x) #define be_int4(x) (x) #define be_int2(x) (x) #define be_int1(x) (x) #define le_int8(x) iswap_int8((x)) #define le_int4(x) iswap_int4((x)) #define le_int2(x) iswap_int2((x)) #define le_int1(x) (x) #endif #ifdef SP_LITTLE_ENDIAN #define be_int8(x) iswap_int8((x)) #define be_int4(x) iswap_int4((x)) #define be_int2(x) iswap_int2((x)) #define be_int1(x) (x) #define le_int8(x) (x) #define le_int4(x) (x) #define le_int2(x) (x) #define le_int1(x) (x) #endif #endif mothur-1.48.0/source/engines/000077500000000000000000000000001424121717000160515ustar00rootroot00000000000000mothur-1.48.0/source/engines/batchengine.cpp000066400000000000000000000170011424121717000210230ustar00rootroot00000000000000// // batchengine.cpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "batchengine.hpp" /***********************************************************************/ //This function opens the batchfile to be used by BatchEngine::getInput. BatchEngine::BatchEngine(string tpath, string batchFile, map ev) : Engine(tpath) { try { batchFile = util.removeQuotes(batchFile); ifstream inBatchTest; openedBatch = util.openInputFile(batchFile, inBatchTest, "no error"); if (!openedBatch) { if (util.checkLocations(batchFile, current->getLocations())) { openedBatch = util.openInputFile(batchFile, inBatchTest); } else { m->mothurOut("[ERROR]: unable to open " + batchFile + " batch file, please correct.\n"); } } batchFileName = batchFile; noBufferNeeded = true; if (openedBatch) { //check for set.logfile string nextcommand = "#"; //force grabbing first command while (!inBatchTest.eof()) { nextcommand = util.getline(inBatchTest); gobble(inBatchTest); if (nextcommand[0] != '#') { //skip comments int pos = nextcommand.find("set.logfile"); if (pos != string::npos) { noBufferNeeded = false; break; } } } inBatchTest.close(); openedBatch = util.openInputFile(batchFileName, inputBatchFile, "no error"); } if (noBufferNeeded) { if (m->getLogFileName() == "") { time_t ltime = time(nullptr); /* calendar time */ string outputPath = current->getOutputDir(); string logFileName = outputPath + "mothur." + toString(ltime) + ".logfile"; m->setLogFileName(logFileName, false); m->mothurOut("\n"); } } setEnvironmentVariables(ev); //inherit environmental variables from nested batch files bstart = time(nullptr); numBatches = 0; } catch(exception& e) { m->errorOut(e, "BatchEngine", "BatchEngine"); exit(1); } } /***********************************************************************/ BatchEngine::~BatchEngine(){ string batchesOutput = ""; if (numBatches != 0) { batchesOutput = " and " + toString(numBatches) + " batch file"; if (numBatches > 1) { batchesOutput += "s"; } } time_t end = time(nullptr); m->mothurOut("\n\nIt took " + toString(end-bstart) + " seconds to run " + toString(numCommandsRun) + " commands" + batchesOutput + " from " + batchFileName + " batch file.\n\n"); } /***********************************************************************/ //This Function allows the user to run a batchfile containing several commands on Dotur bool BatchEngine::getInput(){ try { //check if this is a valid batchfile if (!openedBatch) { return true; } int quitCommandCalled = 0; while(quitCommandCalled != 1){ string input = getNextCommand(inputBatchFile); CommandOptionParser parser(input); string commandName = parser.getCommandString(); string options = parser.getOptionString(); m->mothurOut("\nmothur > " + input + "\n"); if (m->getControl_pressed()) { input = "quit()"; } if (commandName != "") { numCommandsRun++; m->setExecuting(true); m->resetCommandErrors(); m->setChangedSeqNames(true); m->setChangedGroupNames(true); Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); delete command; //if we aborted command if (quitCommandCalled == 2) { m->mothurOut("[ERROR]: did not complete " + commandName + ".\n"); } if (m->getControl_pressed()) { break; } m->setControl_pressed(false); m->setExecuting(false); }else { m->mothurOut("[ERROR]: Invalid command.\n"); } } inputBatchFile.close(); return true; } catch(exception& e) { m->errorOut(e, "BatchEngine", "getInput"); exit(1); } } /***********************************************************************/ string BatchEngine::getNextCommand(ifstream& inputBatchFile) { try { string nextcommand = "#"; //force grabbing first command while (nextcommand[0] == '#') { //skip comments if (!inputBatchFile.eof()) { nextcommand = util.getline(inputBatchFile); gobble(inputBatchFile); }else { nextcommand = "quit()"; break; } //end of file, quit } //allow user to omit the () on the help and quit commands if (nextcommand == "quit") { nextcommand = "quit()"; } if (nextcommand == "help") { nextcommand = "help()"; } string type = findType(nextcommand); if (type == "batch") { m->mothurOut("/*****************************************************************************/\n"); BatchEngine newBatchEngine(path, nextcommand, environmentalVariables); if (newBatchEngine.getOpenedBatch()) { bool bail = false; while(!bail) { bail = newBatchEngine.getInput(); } numBatches++; } m->mothurOut("/*****************************************************************************/\n"); nextcommand = getNextCommand(inputBatchFile); }else if (type == "environment") { //set environmental variables string key, value; value = nextcommand; util.splitAtEquals(key, value); map::iterator it = environmentalVariables.find(key); if (it == environmentalVariables.end()) { environmentalVariables[key] = value; } else { it->second = value; } m->mothurOut("Setting environment variable " + key + " to " + value + "\n"); nextcommand = getNextCommand(inputBatchFile); }else { //assume command, look for environmental variables to replace int evPos = nextcommand.find_first_of('$'); if (evPos == string::npos) { //no '$' , check for mothurhome evPos = nextcommand.find("mothurhome"); if (evPos != string::npos) { replaceVariables(nextcommand); } }else { replaceVariables(nextcommand); } } if (m->getDebug()) { double ramUsed, total; ramUsed = util.getRAMUsed(); total = util.getTotalRAM(); m->mothurOut("RAM used: " + toString(ramUsed/(double)GIG) + " Gigabytes. Total Ram: " + toString(total/(double)GIG) + " Gigabytes.\n\n"); } return nextcommand; } catch(exception& e) { m->errorOut(e, "BatchEngine", "getNextCommand"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/engines/batchengine.hpp000066400000000000000000000011621424121717000210310ustar00rootroot00000000000000// // batchengine.hpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef batchengine_hpp #define batchengine_hpp #include "engine.hpp" class BatchEngine : public Engine { public: BatchEngine(string, string, map); ~BatchEngine(); virtual bool getInput(); bool getOpenedBatch() { return openedBatch; } private: ifstream inputBatchFile; string getNextCommand(ifstream&); string batchFileName; bool openedBatch; time_t bstart; int numBatches; }; #endif /* batchengine_hpp */ mothur-1.48.0/source/engines/engine.hpp000077500000000000000000000130351424121717000200340ustar00rootroot00000000000000#ifndef ENGINE_HPP #define ENGINE_HPP /* * engine.hpp * * * Created by Pat Schloss on 8/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "commandoptionparser.hpp" #include "command.hpp" #include "commandfactory.hpp" #include "mothurout.h" class Engine { public: Engine(string tpath) { try { cFactory = CommandFactory::getInstance(); m = MothurOut::getInstance(); current = CurrentFile::getInstance(); m->resetCommandErrors(); string temppath = tpath.substr(0, (tpath.find_last_of("othur")-5)); //this will happen if you set the path variable to contain mothur's exe location if (temppath == "") { path = util.findProgramPath("mothur"); } else { path = temppath; } if (path != "") { string lastChar = path.substr(path.length()-1); if (lastChar != PATH_SEPARATOR) { path += PATH_SEPARATOR; } path = util.getFullPathName(path); } current->setProgramPath(util.getFullPathName(path)); //if you haven't set your own location #ifdef MOTHUR_FILES #else //set default location to search for files to mothur's executable location. This will resolve issue of double-clicking on the executable which opens mothur and sets pwd to your home directory instead of the mothur directory and leads to "unable to find file" errors. if (current->getProgramPath() != "") { vector temps; temps.push_back(current->getProgramPath()); current->setDefaultPath(temps); } #endif start = time(nullptr); numCommandsRun = 0; noBufferNeeded = false; } catch(exception& e) { m->errorOut(e, "Engine", "Engine"); exit(1); } } virtual ~Engine(){} virtual bool getInput() = 0; virtual string getLogFileName() { return m->getLogFileName(); } vector getOptions() { return options; } virtual void replaceVariables(string& nextCommand) { for (map::iterator it = environmentalVariables.begin(); it != environmentalVariables.end(); it++) { size_t pos = nextCommand.find("$"+it->first); while (pos != string::npos) { //allow for multiple uses of a environmental variable in a single command nextCommand.replace(pos,it->first.length()+1,it->second); //-1 to grab $char pos = nextCommand.find("$"+it->first); } } //replace mothurhome with mothur executable location unsigned long pos = nextCommand.find("mothurhome"); while (pos != string::npos) { //allow for multiple uses of mothurhome in a single command nextCommand.replace(pos,10,current->getProgramPath()); // pos = nextCommand.find("mothurhome"); } } virtual string findType(string nextCommand) { string type = "command"; //determine if this is a command or batch file / environmental variable //we know commands must include '(' characters for search for that size_t openParen = nextCommand.find_first_of('('); if (openParen == string::npos) { //no '(' character -> assume not a command, treat as new batchfile / environmental variable //are you another batch file or an environmental variable //if no '=' sign than not an environmental variable size_t equalsSign = nextCommand.find_first_of('='); if (equalsSign == string::npos) { //no '=' character -> assume not a environmental variable, treat as new batch type = "batch"; }else { //assume environmental variable. filenames can contain '=' characters, but this is a rare case type = "environment"; } } return type; } virtual void setEnvironmentVariables(map ev) { environmentalVariables = ev; //set HOME path is present in environment variables string homeEnvironmentTag = "HOMEPATH"; string homeEnvironmentValue = ""; #if defined NON_WINDOWS homeEnvironmentTag = "HOME"; #endif map::iterator it = environmentalVariables.find(homeEnvironmentTag); if (it != environmentalVariables.end()) { homeEnvironmentValue = it->second; } //parse PATH to set search locations for mothur tools //set HOME path is present in environment variables string pathEnvironmentTag = "PATH"; string pathEnvironmentValue = ""; char delim = ';'; #if defined NON_WINDOWS delim = ':'; #endif it = environmentalVariables.find(pathEnvironmentTag); if (it != environmentalVariables.end()) { pathEnvironmentValue = it->second; } vector pathDirs; util.splitAtChar(pathEnvironmentValue, pathDirs, delim); current->setPaths(pathDirs); current->setHomePath(homeEnvironmentValue); } protected: vector options; CommandFactory* cFactory; MothurOut* m; CurrentFile* current; Utils util; time_t start; int numCommandsRun; bool noBufferNeeded; string path; map environmentalVariables; }; #endif mothur-1.48.0/source/engines/interactengine.cpp000066400000000000000000000143521424121717000215610ustar00rootroot00000000000000// // interactengine.cpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "interactengine.hpp" #include "batchengine.hpp" /***********************************************************************/ InteractEngine::InteractEngine(string tpath, map ev) : Engine(tpath) { if (m->getLogFileName() == "") { time_t ltime = time(nullptr); /* calendar time */ string outputPath = current->getOutputDir(); string logFileName = outputPath + "mothur." + toString(ltime) + ".logfile"; m->setLogFileName(logFileName, false); m->mothurOut("\n"); } setEnvironmentVariables(ev); } /***********************************************************************/ InteractEngine::~InteractEngine(){} /***********************************************************************/ //This function allows the user to input commands one line at a time until they quit. //If the command is garbage it does nothing. bool InteractEngine::getInput(){ try { string input = ""; string commandName = ""; string options = ""; int quitCommandCalled = 0; while(quitCommandCalled != 1){ input = getCommand(); if (m->getControl_pressed()) { input = "quit()"; } //allow user to omit the () on the quit command if (input == "quit") { input = "quit()"; } if (input == "help") { input = "help()"; } CommandOptionParser parser(input); commandName = parser.getCommandString(); options = parser.getOptionString(); if (commandName != "") { numCommandsRun++; m->setExecuting(true); m->resetCommandErrors(); m->setChangedSeqNames(true); m->setChangedGroupNames(true); Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); delete command; //if we aborted command if (quitCommandCalled == 2) { m->mothurOut("[ERROR]: did not complete " + commandName + ".\n"); } m->setControl_pressed(false); m->setExecuting(false); }else { m->mothurOut("[ERROR]: Invalid.\n"); } } return true; } catch(exception& e) { m->errorOut(e, "InteractEngine", "getInput"); exit(1); } } /***********************************************************************/ string InteractEngine::getCommand() { try { string returnCommand = ""; #if defined NON_WINDOWS #ifdef USE_READLINE char* nextCommand = nullptr; nextCommand = readline("\nmothur > "); if(nextCommand != nullptr) { add_history(nextCommand); } else{ //^D causes null string and we want it to quit mothur nextCommand = strdup("quit()"); } m->mothurOutJustToLog("\nmothur > " + toString(nextCommand) + "\n"); returnCommand = nextCommand; free(nextCommand); #else m->mothurOut("\nmothur > "); getline(cin, returnCommand); m->mothurOut("\n"); m->mothurOutJustToLog("\nmothur > " + toString(returnCommand) + "\n"); #endif #else m->mothurOut("\nmothur > "); getline(cin, returnCommand); m->mothurOut("\n"); m->mothurOutJustToLog(toString(returnCommand) + "\n"); #endif //allow user to omit the () on the help and quit commands if (returnCommand == "quit") { returnCommand = "quit()"; } if (returnCommand == "help") { returnCommand = "help()"; } if (returnCommand == "") { return returnCommand; } string type = findType(returnCommand); if (type == "environment") { //set environmental variables string key, value; value = returnCommand; util.splitAtEquals(key, value); map::iterator it = environmentalVariables.find(key); if (it == environmentalVariables.end()) { environmentalVariables[key] = value; } else { it->second = value; } m->mothurOut("Setting environment variable " + key + " to " + value + "\n"); returnCommand = getCommand(); }else if (type == "batch") { m->mothurOut("/*****************************************************************************/\n"); BatchEngine newBatchEngine(path, returnCommand, environmentalVariables); if (newBatchEngine.getOpenedBatch()) { bool bail = false; while(!bail) { bail = newBatchEngine.getInput(); } } m->mothurOut("/*****************************************************************************/\n"); returnCommand = getCommand(); }else { //assume command, look for environmental variables to replace int evPos = returnCommand.find_first_of('$'); if (evPos == string::npos) { //no '$' , check for mothurhome evPos = returnCommand.find("mothurhome"); if (evPos != string::npos) { replaceVariables(returnCommand); } }else { replaceVariables(returnCommand); } } if (m->getDebug()) { double ramUsed, total; ramUsed = util.getRAMUsed(); total = util.getTotalRAM(); m->mothurOut("RAM used: " + toString(ramUsed/(double)GIG) + " Gigabytes. Total Ram: " + toString(total/(double)GIG) + " Gigabytes.\n\n"); } return returnCommand; } catch(exception& e) { m->errorOut(e, "InteractEngine", "getCommand"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/engines/interactengine.hpp000066400000000000000000000007021424121717000215600ustar00rootroot00000000000000// // interactengine.hpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef interactengine_hpp #define interactengine_hpp #include "engine.hpp" class InteractEngine : public Engine { public: InteractEngine(string, map); ~InteractEngine(); virtual bool getInput(); private: string getCommand(); }; #endif /* interactengine_hpp */ mothur-1.48.0/source/engines/scriptengine.cpp000066400000000000000000000146241424121717000212560ustar00rootroot00000000000000// // scriptengine.cpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #include "scriptengine.hpp" /***********************************************************************/ ScriptEngine::ScriptEngine(string tpath, string commandString, map ev) : Engine(tpath){ try { //remove quotes listOfCommands = commandString.substr(1, (commandString.length()-1)); noBufferNeeded = true; int pos = listOfCommands.find("set.logfile"); if (pos != string::npos) { noBufferNeeded = false; } if (noBufferNeeded) { if (m->getLogFileName() == "") { time_t ltime = time(nullptr); /* calendar time */ string outputPath = current->getOutputDir(); string logFileName = outputPath + "mothur." + toString(ltime) + ".logfile"; m->setLogFileName(logFileName, false); m->mothurOut("\n"); } } setEnvironmentVariables(ev); } catch(exception& e) { m->errorOut(e, "ScriptEngine", "ScriptEngine"); exit(1); } } /***********************************************************************/ ScriptEngine::~ScriptEngine(){ time_t end = time(nullptr); m->mothurOut("\n\nIt took " + toString(end-start) + " seconds to run " + toString(numCommandsRun) + " commands from your script.\n\n"); } /***********************************************************************/ //This Function allows the user to run a batchfile containing several commands on mothur bool ScriptEngine::getInput(){ try { string input = ""; string commandName = ""; string options = ""; int quitCommandCalled = 0; while(quitCommandCalled != 1){ input = getNextCommand(listOfCommands); if (input == "") { input = "quit()"; } CommandOptionParser parser(input); commandName = parser.getCommandString(); options = parser.getOptionString(); m->mothurOut("\nmothur > " + input + "\n"); if (m->getControl_pressed()) { input = "quit()"; } if (commandName != "") { numCommandsRun++; m->setExecuting(true); m->resetCommandErrors(); m->setChangedSeqNames(true); m->setChangedGroupNames(true); //executes valid command Command* command = cFactory->getCommand(commandName, options); quitCommandCalled = command->execute(); delete command; //if we aborted command if (quitCommandCalled == 2) { m->mothurOut("[ERROR]: did not complete " + commandName + ".\n"); } if (m->getControl_pressed()) { break; } m->setControl_pressed(false); m->setExecuting(false); }else { m->mothurOut("[ERROR]: Invalid.\n"); } } return true; } catch(exception& e) { m->errorOut(e, "ScriptEngine", "getInput"); exit(1); } } /***********************************************************************/ string ScriptEngine::getNextCommand(string& commandString) { try { string nextcommand = ""; int count = 0; bool ignoreSemiColons = false; //go through string until you reach ; or end while (count < commandString.length()) { //you want to ignore any ; until you reach the next ' if ((commandString[count] == '\'') && (!ignoreSemiColons)) { ignoreSemiColons = true; } else if ((commandString[count] == '\'') && (ignoreSemiColons)) { ignoreSemiColons = false; } if ((commandString[count] == ';') && (!ignoreSemiColons)) { break; } else { nextcommand += commandString[count]; } count++; } //if you are not at the end if (count != commandString.length()) { commandString = commandString.substr(count+1, commandString.length()); } else { commandString = ""; } //get rid of spaces in between commands if any if (commandString.length() > 0) { while (commandString[0] == ' ') { commandString = commandString.substr(1,commandString.length()); if (commandString.length() == 0) { break; } } } //allow user to omit the () on the quit command if (nextcommand == "quit") { nextcommand = "quit()"; } if (nextcommand == "help") { nextcommand = "help()"; } string type = findType(nextcommand); if (type == "environment") { //set environmental variables string key, value; value = nextcommand; util.splitAtEquals(key, value); map::iterator it = environmentalVariables.find(key); if (it == environmentalVariables.end()) { environmentalVariables[key] = value; } else { it->second = value; } m->mothurOut("Setting environment variable " + key + " to " + value + "\n"); nextcommand = getNextCommand(commandString); }else { //assume command, look for environmental variables to replace int evPos = nextcommand.find_first_of('$'); if (evPos == string::npos) { //no '$' , check for mothurhome evPos = nextcommand.find("mothurhome"); if (evPos != string::npos) { replaceVariables(nextcommand); } }else { replaceVariables(nextcommand); } } if (m->getDebug()) { double ramUsed, total; ramUsed = util.getRAMUsed(); total = util.getTotalRAM(); m->mothurOut("RAM used: " + toString(ramUsed/(double)GIG) + " Gigabytes. Total Ram: " + toString(total/(double)GIG) + " Gigabytes.\n\n"); } return nextcommand; } catch(exception& e) { m->errorOut(e, "ScriptEngine", "getNextCommand"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/engines/scriptengine.hpp000066400000000000000000000007571424121717000212650ustar00rootroot00000000000000// // scriptengine.hpp // Mothur // // Created by Sarah Westcott on 10/21/19. // Copyright © 2019 Schloss Lab. All rights reserved. // #ifndef scriptengine_hpp #define scriptengine_hpp #include "engine.hpp" class ScriptEngine : public Engine { public: ScriptEngine(string, string, map); ~ScriptEngine(); virtual bool getInput(); bool openedBatch; private: string listOfCommands; string getNextCommand(string&); }; #endif /* scriptengine_hpp */ mothur-1.48.0/source/fileoutput.cpp000077500000000000000000000155221424121717000173350ustar00rootroot00000000000000/* * fileoutput.cpp * Dotur * * Created by Sarah Westcott on 11/18/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "fileoutput.h" /***********************************************************************/ void ThreeColumnFile::setLabelName(string label){ try { if(!firstLabel) { fileHeader += "\t" + label + "\tlci\thci"; } else { fileHeader = "numsampled\t" + label + "\tlci\thci"; } } catch(exception& e) { m->errorOut(e, "ThreeColumnFile", "setLabelName"); exit(1); } } /***********************************************************************/ void ThreeColumnFile::updateOutput(int nSeqs, vector data){ try { map::iterator it = nseqsToRow.find(nSeqs); if (it != nseqsToRow.end()) { //new column in row for next label int resultsIndex = it->second; results[resultsIndex].push_back(data[0]); results[resultsIndex].push_back(data[1]); results[resultsIndex].push_back(data[2]); }else{ //new row nseqsToRow[nSeqs] = results.size(); vector theseResults; theseResults.push_back(nSeqs); theseResults.push_back(data[0]); theseResults.push_back(data[1]); theseResults.push_back(data[2]); results.push_back(theseResults); } } catch(exception& e) { m->errorOut(e, "ThreeColumnFile", "updateOutput"); exit(1); } } /***********************************************************************/ void ColumnFile::setLabelName(string label, vector tags){ try { if(firstLabel){ fileHeader = ""; } for(int i = 0; i < tags.size(); i++) { fileHeader += label + tags[i] + '\t'; } } catch(exception& e) { m->errorOut(e, "ColumnFile", "setLabelName"); exit(1); } } /***********************************************************************/ void ColumnFile::updateOutput(vector data){ try { vector theseResults; for (size_t i = 0; i < data.size(); i++) { theseResults.push_back(data[i]); } results.push_back(theseResults); } catch(exception& e) { m->errorOut(e, "ColumnFile", "updateOutput"); exit(1); } } /***********************************************************************/ void FileOutput::printFile(){ try { ofstream outFile; util.openOutputFile(filename, outFile); outFile.setf(ios::fixed, ios::floatfield); outFile.setf(ios::showpoint); cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); outFile << fileHeader << endl; for (size_t i = 0; i < results.size(); i++) { for (size_t j = 0; j < results[i].size(); j++) { outFile << setprecision(6) << results[i][j] << '\t'; } outFile << endl; } outFile << endl; outFile.close(); } catch(exception& e) { m->errorOut(e, "FileOutput", "printFile"); exit(1); } } /***********************************************************************/ void SharedThreeColumnFile::setLabelName(string label){ try { if (groupLabel != "") { groupLabel = "_" + groupLabel; } if(!firstLabel) { fileHeader += "\t" + label + groupLabel + "\tlci" + groupLabel + "\thci"+ groupLabel; } else { fileHeader = "numsampled\t" + label + groupLabel + "\tlci" + groupLabel + "\thci"+ groupLabel; } } catch(exception& e) { m->errorOut(e, "SharedThreeColumnFile", "setLabelName"); exit(1); } } /***********************************************************************/ void SharedThreeColumnFile::updateOutput(int nSeqs, vector data){ try { map::iterator it = nseqsToRow.find(nSeqs); if (it != nseqsToRow.end()) { //new column in row for next label int resultsIndex = it->second; results[resultsIndex].push_back(data[0]); results[resultsIndex].push_back(data[1]); results[resultsIndex].push_back(data[2]); }else{ //new row nseqsToRow[nSeqs] = results.size(); vector theseResults; theseResults.push_back(numGroup); numGroup++; theseResults.push_back(data[0]); theseResults.push_back(data[1]); theseResults.push_back(data[2]); results.push_back(theseResults); } } catch(exception& e) { m->errorOut(e, "SharedThreeColumnFile", "output"); exit(1); } } /***********************************************************************/ void OneColumnFile::setLabelName(string label){ try { if(!firstLabel) { fileHeader += "\t" + label; } else { fileHeader = "numsampled\t" + label; } } catch(exception& e) { m->errorOut(e, "OneColumnFile", "setLabelName"); exit(1); } } /***********************************************************************/ void OneColumnFile::updateOutput(int nSeqs, vector data){ try { map::iterator it = nseqsToRow.find(nSeqs); if (it != nseqsToRow.end()) { //new column in row for next label int resultsIndex = it->second; results[resultsIndex].push_back(data[0]); }else{ //new row nseqsToRow[nSeqs] = results.size(); vector theseResults; theseResults.push_back(nSeqs); theseResults.push_back(data[0]); results.push_back(theseResults); } } catch(exception& e) { m->errorOut(e, "OneColumnFile", "updateOutput"); exit(1); } } /***********************************************************************/ void SharedOneColumnFile::setLabelName(string label){ try { if(!firstLabel) { fileHeader += "\t" + label; } else { fileHeader = "sampled\t" + label; } } catch(exception& e) { m->errorOut(e, "SharedOneColumnFile", "setLabelName"); exit(1); } } /***********************************************************************/ void SharedOneColumnFile::updateOutput(int nSeqs, vector data){ try { map::iterator it = nseqsToRow.find(nSeqs); if (it != nseqsToRow.end()) { //new column in row for next label int resultsIndex = it->second; for (int i = 0; i < data.size(); i++) { results[resultsIndex].push_back(data[i]); } }else{ //new row nseqsToRow[nSeqs] = results.size(); vector theseResults; theseResults.push_back(nSeqs); for (int i = 0; i < data.size(); i++) { theseResults.push_back(data[i]); } results.push_back(theseResults); } } catch(exception& e) { m->errorOut(e, "SharedOneColumnFile", "updateOutput"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/fileoutput.h000077500000000000000000000054751424121717000170100ustar00rootroot00000000000000#ifndef FILEOUTPUT_H #define FILEOUTPUT_H #include "mothurout.h" #include "utils.hpp" /***********************************************************************/ class FileOutput { public: FileOutput(string n){ m = MothurOut::getInstance(); fileHeader = ""; filename = n; firstLabel = true; } virtual ~FileOutput(){ printFile(); } virtual void setLabelName(string) {} virtual void updateOutput(int, vector) {} virtual void resetFile() { firstLabel = false; } virtual void setLabelName(string, vector) {} virtual void updateOutput(vector) {} protected: MothurOut* m; Utils util; string filename, fileHeader; bool firstLabel; map nseqsToRow; //maps number of seqs sampled to row in results vector< vector > results; //results[0] is the first row in output file. can contain multiple labels is 0.01 0.03 /* numsampled 0.01 0.03 1.000000 1.000000 1.00000 - results[0] 100.000000 47.000000 30.00000 - results[1] .... */ void printFile(); }; /***********************************************************************/ class ThreeColumnFile : public FileOutput { public: ThreeColumnFile(string n) : FileOutput(n) { } ~ThreeColumnFile() = default; void setLabelName(string); void updateOutput(int, vector); private: }; /***********************************************************************/ class OneColumnFile : public FileOutput { public: OneColumnFile(string n) : FileOutput(n) { } ~OneColumnFile() = default; void setLabelName(string); void updateOutput(int, vector); private: }; /***********************************************************************/ class SharedOneColumnFile : public FileOutput { public: SharedOneColumnFile(string n) : FileOutput(n) {} ~SharedOneColumnFile() = default; void setLabelName(string); void updateOutput(int, vector); private: }; /***********************************************************************/ class SharedThreeColumnFile : public FileOutput { public: SharedThreeColumnFile(string n, string groups) : FileOutput(n), groupLabel(groups), numGroup(1) { } ~SharedThreeColumnFile() = default; void setLabelName(string); void updateOutput(int, vector); private: int numGroup; string groupLabel; }; /***********************************************************************/ //used by parsimony, unifrac.weighted and unifrac.unweighted class ColumnFile : public FileOutput { public: ColumnFile(string n, string i) : FileOutput(n) {} ~ColumnFile() = default; void setLabelName(string, vector); void updateOutput(vector); private: }; /***********************************************************************/ #endif mothur-1.48.0/source/gotohoverlap.cpp000077500000000000000000000073321424121717000176460ustar00rootroot00000000000000/* * gotohoverlap.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in: * * Gotoh O. 1982. An improved algorithm for matching biological sequences. J. Mol. Biol. 162:705-8. * Myers, EW & Miller, W. 1988. Optimal alignments in linear space. Comput Appl Biosci. 4:11-7. * * This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used * in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position. * Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate * method to fix the ends (see Overlap class documentation) * */ #include "alignmentcell.hpp" #include "overlap.hpp" #include "alignment.hpp" #include "gotohoverlap.hpp" /**************************************************************************************************/ GotohOverlap::GotohOverlap(float gO, float gE, float f, float mm, int r) : gapOpen(gO), gapExtend(gE), match(f), mismatch(mm), Alignment(r) { try { for(int i=1;ierrorOut(e, "GotohOverlap", "GotohOverlap"); exit(1); } } /**************************************************************************************************/ void GotohOverlap::align(string A, string B, bool createBaseMap){ try { seqA = ' ' + A; lA = seqA.length(); // the algorithm requires that the first character be a dummy value seqB = ' ' + B; lB = seqB.length(); // the algorithm requires that the first character be a dummy value for(int i=1;i alignment[i][j].dValue){ if(alignment[i][j].iValue > diagonal){ alignment[i][j].cValue = alignment[i][j].iValue; alignment[i][j].prevCell = 'l'; } else{ alignment[i][j].cValue = diagonal; alignment[i][j].prevCell = 'd'; } } else{ if(alignment[i][j].dValue > diagonal){ alignment[i][j].cValue = alignment[i][j].dValue; alignment[i][j].prevCell = 'u'; } else{ alignment[i][j].cValue = diagonal; alignment[i][j].prevCell = 'd'; } } } } Overlap over; over.setOverlap(alignment, lA, lB, 0); // Fix the gaps at the ends of the sequences traceBack(createBaseMap); // Construct the alignment and set seqAaln and seqBaln } catch(exception& e) { m->errorOut(e, "GotohOverlap", "align"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/gotohoverlap.hpp000077500000000000000000000026451424121717000176550ustar00rootroot00000000000000#ifndef GOTOHOVERLAP_H #define GOTOHOVERLAP_H /* * gotohoverlap.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in: * * Gotoh O. 1982. An improved algorithm for matching biological sequences. J. Mol. Biol. 162:705-8. * Myers, EW & Miller, W. 1988. Optimal alignments in linear space. Comput Appl Biosci. 4:11-7. * * This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used * in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position. * Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate * method to fix the ends (see Overlap class documentation) * */ #include "mothur.h" #include "alignment.hpp" /**************************************************************************************************/ class GotohOverlap : public Alignment { public: GotohOverlap(float, float, float, float, int); void align(string, string, bool createBaseMap=false); ~GotohOverlap() = default; private: float gapOpen; float gapExtend; float match; float mismatch; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/heatmap.cpp000077500000000000000000000646311424121717000165610ustar00rootroot00000000000000/* * heatmap.cpp * Mothur * * Created by Sarah Westcott on 3/25/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "heatmap.h" //********************************************************************************************************************** HeatMap::HeatMap(string sort, string scale, int num, int fsize, string dir, string i){ try { m = MothurOut::getInstance(); sorted = sort; scaler = scale; outputDir = dir; numOTU = num; fontSize = fsize; inputfile = i; } catch(exception& e) { m->errorOut(e, "HeatMap", "HeatMap"); exit(1); } } //********************************************************************************************************************** string HeatMap::getPic(RAbundVector* rabund) { try { int numBinsToDisplay = rabund->getNumBins(); if (numOTU != 0) { //user want to display a portion of the otus if (numOTU < numBinsToDisplay) { numBinsToDisplay = numOTU; } } //sort lookup so shared bins are on top if (sorted != "none") { sortRabund(rabund); } float maxRelAbund = 0.0; for(int i=0;isize();i++){ float relAbund = rabund->get(i) / (float)rabund->getNumSeqs(); if(relAbund > maxRelAbund){ maxRelAbund = relAbund; } } vector scaleRelAbund(numBinsToDisplay, ""); for(int i=0;iget(i) / (float)rabund->getNumSeqs(); if (m->getControl_pressed()) { return "control"; } if (rabund->get(i) != 0) { //don't want log value of 0. if (scaler == "log10") { scaleRelAbund[i] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund))) + "0000"; }else if (scaler == "log2") { scaleRelAbund[i] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund))) + "0000"; }else if (scaler == "linear") { scaleRelAbund[i] = toHex(int(255 * relAbund / maxRelAbund)) + "0000"; }else { //if user enters invalid scaler option. scaleRelAbund[i] = toHex(int(255 * log10(relAbund / log10(maxRelAbund)))) + "0000"; } } else { scaleRelAbund[i] = "FFFFFF"; } } string filenamesvg = outputDir + util.getRootName(util.getSimpleName(inputfile)) + rabund->getLabel() + ".heatmap.bin.svg"; util.openOutputFile(filenamesvg, outsvg); //svg image outsvg << "\n"; outsvg << "\n"; //white backround outsvg << ""; outsvg << "Heatmap at distance " + rabund->getLabel() + "\n"; //output legend and color labels string color; int x = 0; int y = 103 + (numBinsToDisplay*5); printLegend(y, maxRelAbund); y = 70; for (int i = 0; i < scaleRelAbund.size(); i++) { if (m->getControl_pressed()) { outsvg.close(); return "control"; } outsvg << "\n"; y += 5; } outsvg << "\n\n"; outsvg.close(); return filenamesvg; } catch(exception& e) { m->errorOut(e, "HeatMap", "getPic"); exit(1); } } //********************************************************************************************************************** string HeatMap::getPic(SharedRAbundVectors*& data) { try { vector lookup = data->getSharedRAbundVectors(); vector groups = data->getNamesGroups(); int numBinsToDisplay = lookup[0]->getNumBins(); if (numOTU != 0) { //user want to display a portion of the otus if (numOTU < numBinsToDisplay) { numBinsToDisplay = numOTU; } } //sort lookup so shared bins are on top vector sortedLabels = data->getOTUNames(); if (sorted != "none") { sortedLabels = sortSharedVectors(lookup, sortedLabels); } vector > scaleRelAbund; vector maxRelAbund(lookup.size(), 0.0); float superMaxRelAbund = 0; for(int i = 0; i < lookup.size(); i++){ for(int j=0; jsize(); j++){ float relAbund = lookup[i]->get(j) / (float)lookup[i]->getNumSeqs(); if(relAbund > maxRelAbund[i]){ maxRelAbund[i] = relAbund; } } if(maxRelAbund[i] > superMaxRelAbund){ superMaxRelAbund = maxRelAbund[i]; } } scaleRelAbund.resize(lookup.size()); for(int i=0;igetControl_pressed()) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return "control"; } float relAbund = lookup[i]->get(j) / (float)lookup[i]->getNumSeqs(); if (lookup[i]->get(j) != 0) { //don't want log value of 0. if (scaler == "log10") { if (util.isEqual(maxRelAbund[i], 1)) { maxRelAbund[i] -= 0.001; } if (util.isEqual(relAbund, 1)) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000"; }else if (scaler == "log2") { if (util.isEqual(maxRelAbund[i], 1)) { maxRelAbund[i] -= 0.001; } if (util.isEqual(relAbund, 1)) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000"; }else if (scaler == "linear") { scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; }else { //if user enters invalid scaler option. if (util.isEqual(maxRelAbund[i], 1)) { maxRelAbund[i] += 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i])))) + "0000"; } }else { scaleRelAbund[i][j] = "FFFFFF"; } } } string filenamesvg = outputDir + util.getRootName(util.getSimpleName(inputfile)) + lookup[0]->getLabel() + ".heatmap.bin.svg"; util.openOutputFile(filenamesvg, outsvg); int binHeight = 20; int labelBump = 100; int binWidth = 300; //svg image outsvg << "\n"; outsvg << "\n"; //white backround outsvg << ""; outsvg << "Heatmap at distance " + lookup[0]->getLabel() + "\n"; //column labels for (int h = 0; h < lookup.size()+1; h++) { if (h == 0) { string tempLabel = "OTU"; outsvg << "" + tempLabel + "\n"; }else { outsvg << "" + groups[h-1] + "\n"; } } //output legend and color labels string color; int x = 0; int y = 103 + (numBinsToDisplay*binHeight); printLegend(y, superMaxRelAbund); y = 70; for (int i = 0; i < numBinsToDisplay; i++) { outsvg << "" + sortedLabels[i] + "\n"; x += labelBump; for (int j = 0; j < scaleRelAbund.size(); j++) { if (m->getControl_pressed()) { outsvg.close(); return "control"; } outsvg << "\n"; x += binWidth; } x = 0; y += binHeight; } outsvg << "\n\n"; outsvg.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return filenamesvg; } catch(exception& e) { m->errorOut(e, "HeatMap", "getPic"); exit(1); } } //********************************************************************************************************************** vector HeatMap::sortSharedVectors(vector lookup, vector currentLabels){ try { vector sortedLabels; sortedLabels.resize(currentLabels.size(), ""); vector looktemp; map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; /****************** find order of otus **********************/ if (sorted == "shared") { place = orderShared(lookup); }else if (sorted == "topotu") { place = orderTopOtu(lookup); }else if (sorted == "topgroup") { place = orderTopGroup(lookup); }else { m->mothurOut("Error: invalid sort option.\n"); } /******************* create copy of lookup *********************/ //create and initialize looktemp as a copy of lookup for (int i = 0; i < lookup.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(*lookup[i]); temp->setLabel(lookup[i]->getLabel()); looktemp.push_back(temp); } /************************ fill lookup in order given by place *********************/ //for each bin for (int i = 0; i < looktemp[0]->getNumBins(); i++) { //place //fill lookup // 2 -> 1 for (int j = 0; j < looktemp.size(); j++) { // 3 -> 2 int newAbund = looktemp[j]->get(i); // 1 -> 3 lookup[j]->set(place[i], newAbund); //binNumber, abundance, group sortedLabels[place[i]] = currentLabels[i]; } } return sortedLabels; } catch(exception& e) { m->errorOut(e, "HeatMap", "sortSharedVectors"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderShared(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector sharedBins; vector uniqueBins; //for each bin for (int i = 0; i < lookup[0]->getNumBins(); i++) { int count = 0; //is this bin shared for (int j = 0; j < lookup.size(); j++) { if (lookup[j]->get(i) != 0) { count++; } } if (count < 2) { uniqueBins.push_back(i); } else { sharedBins.push_back(i); } } //fill place for (int i = 0; i < sharedBins.size(); i++) { place[sharedBins[i]] = i; } for (int i = 0; i < uniqueBins.size(); i++) { place[uniqueBins[i]] = (sharedBins.size() + i); } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderShared"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderTopOtu(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector totals; //for each bin for (int i = 0; i < lookup[0]->getNumBins(); i++) { int total = 0; for (int j = 0; j < lookup.size(); j++) { total += lookup[j]->get(i); } binCount temp(i, total); totals.push_back(temp); } sort(totals.begin(), totals.end(), comparebinCounts); //fill place for (int i = 0; i < totals.size(); i++) { place[totals[i].bin] = i; } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderTopOtu"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderTopGroup(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector < vector > totals; //totals[0] = bin totals for group 0, totals[1] = bin totals for group 1, ... totals.resize(lookup.size()); //for each bin for (int i = 0; i < lookup[0]->getNumBins(); i++) { for (int j = 0; j < lookup.size(); j++) { binCount temp(i, (lookup[j]->get(i))); totals[j].push_back(temp); } } for (int i = 0; i < totals.size(); i++) { sort(totals[i].begin(), totals[i].end(), comparebinCounts); } //fill place //grab the top otu for each group adding it if its not already added int count = 0; for (int i = 0; i < totals[0].size(); i++) { for (int j = 0; j < totals.size(); j++) { it = place.find(totals[j][i].bin); if (it == place.end()) { //not added yet place[totals[j][i].bin] = count; count++; } } } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderTopGroup"); exit(1); } } //********************************************************************************************************************** void HeatMap::printLegend(int y, float maxbin) { try { //output legend and color labels //go through map and give each score a color value string color; int x = 10; //prints legend for (int i = 1; i < 255; i++) { color = toHex(int((float)(i))); outsvg << "\n"; x += 1; } //prints legend labels x = 10; for (int i = 1; i<=5; i++) { float label; if(scaler== "log10") { label = maxbin * log10(51*i) / log10(255); } else if(scaler== "log2") { label = maxbin * log2(51*i) / log2(255); } else if(scaler== "linear") { label = maxbin * 51 * i / 255; } else { label = maxbin * log10(51*i) / log10(255); } label = int(label * 1000 + 0.5); label /= 1000.0; string text = toString(label); outsvg << "" + text + "\n"; x += 60; } } catch(exception& e) { m->errorOut(e, "HeatMap", "printLegend"); exit(1); } } //********************************************************************************************************************** string HeatMap::getPic(SharedRAbundFloatVectors*& data) { try { vector lookup = data->getSharedRAbundFloatVectors(); vector groups = data->getNamesGroups(); int numBinsToDisplay = lookup[0]->getNumBins(); if (numOTU != 0) { //user want to display a portion of the otus if (numOTU < numBinsToDisplay) { numBinsToDisplay = numOTU; } } //sort lookup so shared bins are on top vector sortedLabels = data->getOTUNames(); if (sorted != "none") { sortedLabels = sortSharedVectors(lookup, sortedLabels); } vector > scaleRelAbund; vector maxRelAbund(lookup.size(), 0.0); float superMaxRelAbund = 0; for(int i = 0; i < lookup.size(); i++){ for(int j=0; jget(j); if(relAbund > maxRelAbund[i]){ maxRelAbund[i] = relAbund; } } if(maxRelAbund[i] > superMaxRelAbund){ superMaxRelAbund = maxRelAbund[i]; } } scaleRelAbund.resize(lookup.size()); for(int i=0;igetControl_pressed()) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return "control"; } float relAbund = lookup[i]->get(j); if (!util.isEqual(relAbund, 0)) { //don't want log value of 0. if (scaler == "log10") { if (util.isEqual(maxRelAbund[i], 1)) { maxRelAbund[i] -= 0.001; } if (util.isEqual(relAbund, 1)) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000"; }else if (scaler == "log2") { if (util.isEqual(maxRelAbund[i], 1)) { maxRelAbund[i] -= 0.001; } if (util.isEqual(relAbund, 1)) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000"; }else if (scaler == "linear") { scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; }else { //if user enters invalid scaler option. scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i])))) + "0000"; } }else { scaleRelAbund[i][j] = "FFFFFF"; } } } string filenamesvg = outputDir + util.getRootName(util.getSimpleName(inputfile)) + lookup[0]->getLabel() + ".heatmap.bin.svg"; util.openOutputFile(filenamesvg, outsvg); int binHeight = 20; int labelBump = 100; int binWidth = 300; //svg image outsvg << "\n"; outsvg << "\n"; //white backround outsvg << ""; outsvg << "Heatmap at distance " + lookup[0]->getLabel() + "\n"; //column labels for (int h = 0; h < lookup.size()+1; h++) { if (h == 0) { string tempLabel = "OTU"; outsvg << "" + tempLabel + "\n"; }else { outsvg << "" + groups[h-1] + "\n"; } } //output legend and color labels string color; int x = 0; int y = 103 + (numBinsToDisplay*binHeight); printLegend(y, superMaxRelAbund); y = 70; for (int i = 0; i < numBinsToDisplay; i++) { outsvg << "" + sortedLabels[i] + "\n"; x += labelBump; for (int j = 0; j < scaleRelAbund.size(); j++) { if (m->getControl_pressed()) { outsvg.close(); return "control"; } outsvg << "\n"; x += binWidth; } x = 0; y += binHeight; } outsvg << "\n\n"; outsvg.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return filenamesvg; } catch(exception& e) { m->errorOut(e, "HeatMap", "getPic"); exit(1); } } //********************************************************************************************************************** vector HeatMap::sortSharedVectors(vector lookup, vector currentLabels){ try { vector looktemp; map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector sortedLabels; sortedLabels.resize(currentLabels.size(), ""); /****************** find order of otus **********************/ if (sorted == "shared") { place = orderShared(lookup); }else if (sorted == "topotu") { place = orderTopOtu(lookup); }else if (sorted == "topgroup") { place = orderTopGroup(lookup); }else { m->mothurOut("Error: invalid sort option.\n"); return sortedLabels; } /******************* create copy of lookup *********************/ //create and initialize looktemp as a copy of lookup for (int i = 0; i < lookup.size(); i++) { SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(*lookup[i]); temp->setLabel(lookup[i]->getLabel()); looktemp.push_back(temp); } /************************ fill lookup in order given by place *********************/ //for each bin for (int i = 0; i < looktemp[0]->size(); i++) { //place //fill lookup // 2 -> 1 for (int j = 0; j < looktemp.size(); j++) { // 3 -> 2 float newAbund = looktemp[j]->get(i); // 1 -> 3 lookup[j]->set(place[i], newAbund); //binNumber, abundance, group sortedLabels[place[i]] = currentLabels[i]; } } return sortedLabels; } catch(exception& e) { m->errorOut(e, "HeatMap", "sortSharedVectors"); exit(1); } } //********************************************************************************************************************** int HeatMap::sortRabund(RAbundVector* r){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; /****************** find order of otus **********************/ vector totals; //for each bin for (int i = 0; i < r->getNumBins(); i++) { binCount temp(i, r->get(i)); totals.push_back(temp); } sort(totals.begin(), totals.end(), comparebinCounts); //fill place for (int i = 0; i < totals.size(); i++) { place[totals[i].bin] = i; } /******************* create copy of lookup *********************/ //create and initialize rtemp as a copy of r RAbundVector* rtemp = new RAbundVector(r->getNumBins()); for (int i = 0; i < r->size(); i++) { rtemp->set(i, r->get(i)); } rtemp->setLabel(r->getLabel()); /************************ fill lookup in order given by place *********************/ //for each bin for (int i = 0; i < rtemp->size(); i++) { //place //fill lookup // 2 -> 1 // 3 -> 2 int newAbund = rtemp->get(i); // 1 -> 3 r->set(place[i], newAbund); //binNumber, abundance } return 0; } catch(exception& e) { m->errorOut(e, "HeatMap", "sortRabund"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderShared(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector sharedBins; vector uniqueBins; //for each bin for (int i = 0; i < lookup[0]->getNumBins(); i++) { int count = 0; //is this bin shared for (int j = 0; j < lookup.size(); j++) { if (!util.isEqual(lookup[j]->get(i), 0)) { count++; } } if (count < 2) { uniqueBins.push_back(i); } else { sharedBins.push_back(i); } } //fill place for (int i = 0; i < sharedBins.size(); i++) { place[sharedBins[i]] = i; } for (int i = 0; i < uniqueBins.size(); i++) { place[uniqueBins[i]] = (sharedBins.size() + i); } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderShared"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderTopOtu(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector totals; //for each bin for (int i = 0; i < lookup[0]->size(); i++) { int total = 0; for (int j = 0; j < lookup.size(); j++) { total += lookup[j]->get(i); } binCountFloat temp(i, total); totals.push_back(temp); } sort(totals.begin(), totals.end(), comparebinFloatCounts); //fill place for (int i = 0; i < totals.size(); i++) { place[totals[i].bin] = i; } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderTopOtu"); exit(1); } } //********************************************************************************************************************** map HeatMap::orderTopGroup(vector lookup){ try { map place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2. map::iterator it; vector < vector > totals; //totals[0] = bin totals for group 0, totals[1] = bin totals for group 1, ... totals.resize(lookup.size()); //for each bin for (int i = 0; i < lookup[0]->size(); i++) { for (int j = 0; j < lookup.size(); j++) { binCountFloat temp(i, (lookup[j]->get(i))); totals[j].push_back(temp); } } for (int i = 0; i < totals.size(); i++) { sort(totals[i].begin(), totals[i].end(), comparebinFloatCounts); } //fill place //grab the top otu for each group adding it if its not already added int count = 0; for (int i = 0; i < totals[0].size(); i++) { for (int j = 0; j < totals.size(); j++) { it = place.find(totals[j][i].bin); if (it == place.end()) { //not added yet place[totals[j][i].bin] = count; count++; } } } return place; } catch(exception& e) { m->errorOut(e, "HeatMap", "orderTopGroup"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/heatmap.h000077500000000000000000000042711424121717000162200ustar00rootroot00000000000000#ifndef HEATMAP_H #define HEATMAP_H /* * heatmap.h * Mothur * * Created by Sarah Westcott on 3/25/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "sharedrabundvectors.hpp" #include "rabundvector.hpp" #include "sharedrabundfloatvectors.hpp" #include "utils.hpp" /***********************************************************************/ struct binCount { int bin; int abund; binCount(int i, int j) : bin(i), abund(j) {} }; /***********************************************************************/ struct binCountFloat { int bin; float abund; binCountFloat(int i, float j) : bin(i), abund(j) {} }; /***********************************************************************/ //sorts highest abund to lowest inline bool comparebinCounts(binCount left, binCount right){ return (left.abund > right.abund); } /***********************************************************************/ //sorts highest abund to lowest inline bool comparebinFloatCounts(binCountFloat left, binCountFloat right){ return (left.abund > right.abund); } /***********************************************************************/ class HeatMap { public: HeatMap(string, string, int, int, string, string); ~HeatMap(){}; string getPic(RAbundVector*); string getPic(SharedRAbundVectors*&); string getPic(SharedRAbundFloatVectors*&); private: vector sortSharedVectors(vector, vector); vector sortSharedVectors(vector, vector); int sortRabund(RAbundVector*); void printLegend(int, float); string format, sorted, groupComb, scaler, outputDir, inputfile; ofstream outsvg; MothurOut* m; Utils util; int numOTU, fontSize; map orderTopGroup(vector); map orderTopOtu(vector); map orderShared(vector); map orderTopGroup(vector); map orderTopOtu(vector); map orderShared(vector); }; /***********************************************************************/ #endif mothur-1.48.0/source/heatmapsim.cpp000077500000000000000000000212701424121717000172620ustar00rootroot00000000000000/* * heatmapsim.cpp * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "heatmapsim.h" #include "sharedjabund.h" #include "sharedsorabund.h" #include "sharedjclass.h" #include "sharedsorclass.h" #include "sharedjest.h" #include "sharedsorest.h" #include "sharedthetayc.h" #include "sharedthetan.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" #include "sharedrabundvectors.hpp" //********************************************************************************************************************** HeatMapSim::HeatMapSim(string dir, string i, int f) : outputDir(dir), inputfile(i), fontSize(f) { m = MothurOut::getInstance(); } //********************************************************************************************************************** vector HeatMapSim::getPic(SharedRAbundVectors*& allLookup, vector calcs, vector groups) { try { EstOutput data; vector sims; vector outputNames; vector lookup = allLookup->getSharedRAbundVectors(); //make file for each calculator selected for (int k = 0; k < calcs.size(); k++) { if (m->getControl_pressed()) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return outputNames; } string filenamesvg = outputDir + util.getRootName(util.getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + calcs[k]->getName() + ".heatmap.sim.svg"; util.openOutputFile(filenamesvg, outsvg); outputNames.push_back(filenamesvg); //svg image outsvg << "\n"; outsvg << "\n"; //white backround outsvg << ""; outsvg << "Heatmap at distance " + lookup[0]->getLabel() + "\n"; //column labels for (int h = 0; h < lookup.size(); h++) { outsvg << "" + groups[h] + "\n"; outsvg << "" + groups[h] + "\n"; } sims.clear(); double biggest = 0; double smallest = 10000000; //float scaler; //get sim for each comparison and save them so you can find the relative similairity for(int i = 0; i < (lookup.size()-1); i++){ for(int j = (i+1); j < lookup.size(); j++){ if (m->getControl_pressed()) { outsvg.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return outputNames; } vector subset; subset.push_back(lookup[i]); subset.push_back(lookup[j]); //get similairity between groups data = calcs[k]->getValues(subset); sims.push_back(1.0 - data[0]); //save biggest similairity to set relative sim if ((1.0 - data[0]) > biggest) { biggest = (1.0 - data[0]); } //save smalllest similairity to set relative sim if ((1.0 - data[0]) < smallest) { smallest = (1.0 - data[0]); } } } //map biggest similairity found to red float scalerBig = 255.0 / biggest; int count = 0; //output similairites to file for(int i = 0; i < (lookup.size()-1); i++){ for(int j = (i+1); j < lookup.size(); j++){ //find relative color int color = scalerBig * sims[count]; //draw box outsvg << "\n"; count++; } } int y = ((lookup.size() * 150) + 120); printLegend(y, biggest, smallest); outsvg << "\n\n"; outsvg.close(); } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return outputNames; } catch(exception& e) { m->errorOut(e, "HeatMapSim", "getPic"); exit(1); } } //********************************************************************************************************************** string HeatMapSim::getPic(vector< vector > dists, vector groups) { try { vector sims; string filenamesvg = outputDir + util.getRootName(util.getSimpleName(inputfile)) + "heatmap.sim.svg"; util.openOutputFile(filenamesvg, outsvg); //svg image outsvg << "\n"; outsvg << "\n"; //white backround outsvg << ""; outsvg << "Heatmap for " + inputfile + "\n"; //column labels for (int h = 0; h < groups.size(); h++) { outsvg << "" + groups[h] + "\n"; outsvg << "" + groups[h] + "\n"; } double biggest = -1; double smallest = 10000000; float scaler; //get sim for each comparison and save them so you can find the relative similairity for(int i = 0; i < (dists.size()-1); i++){ for(int j = (i+1); j < dists.size(); j++){ if (m->getControl_pressed()) { outsvg.close(); return filenamesvg; } float sim = 1.0 - dists[i][j]; sims.push_back(sim); //save biggest similairity to set relative sim if (sim > biggest) { biggest = sim; } //save smalllest similairity to set relative sim if (sim < smallest) { smallest = sim; } } } //map biggest similairity found to red scaler = 255.0 / biggest; int count = 0; //output similairites to file for(int i = 0; i < (dists.size()-1); i++){ for(int j = (i+1); j < dists.size(); j++){ //find relative color int color = scaler * sims[count]; //draw box outsvg << "\n"; count++; } } int y = ((dists.size() * 150) + 120); printLegend(y, biggest, smallest); outsvg << "\n\n"; outsvg.close(); return filenamesvg; } catch(exception& e) { m->errorOut(e, "HeatMapSim", "getPic"); exit(1); } } //********************************************************************************************************************** void HeatMapSim::printLegend(int y, float maxSim, float minSim) { try { //output legend and color labels //go through map and give each score a color value string color; int x = 10; //prints legend for (int i = 1; i < 255; i++) { color = toHex(int((float)(i))); outsvg << "\n"; x += 3; } float scaler = (maxSim-minSim) / 5.0; //prints legend labels x = 0; for (int i = 0; i<=5; i++) { float label = scaler*i; label = int(label * 1000 + 0.5); label /= 1000.0; string text = toString(label); outsvg << "" + text + "\n"; x += 153; } } catch(exception& e) { m->errorOut(e, "HeatMapSim", "printLegend"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/heatmapsim.h000077500000000000000000000015021424121717000167230ustar00rootroot00000000000000#ifndef HEATMAPSIM_H #define HEATMAPSIM_H /* * heatmapsim.h * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "calculator.h" #include "utils.hpp" class SharedRAbundVectors; /***********************************************************************/ class HeatMapSim { public: HeatMapSim(string, string, int); ~HeatMapSim(){}; vector getPic(SharedRAbundVectors*&, vector, vector); string getPic(vector< vector >, vector); private: void printLegend(int, float, float); string format, groupComb, outputDir, inputfile; int fontSize; ofstream outsvg; MothurOut* m; Utils util; }; /***********************************************************************/ #endif mothur-1.48.0/source/inputdata.cpp000077500000000000000000000736761424121717000171440ustar00rootroot00000000000000/* * inputdata.cpp * Dotur * * Created by Sarah Westcott on 11/18/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "inputdata.h" #include "ordervector.hpp" #include "listvector.hpp" #include "rabundvector.hpp" #include "sharedrabundvectors.hpp" #include "sharedclrvectors.hpp" /***********************************************************************/ InputData::InputData(string fName, string f, vector userGroups) : format(f){ m = MothurOut::getInstance(); util.openInputFile(fName, fileHandle); filename = fName; nextDistanceLabel = ""; groups = userGroups; otuTag = util.getTag(fName); } /***********************************************************************/ InputData::~InputData(){ fileHandle.close(); nextDistanceLabel = ""; } /***********************************************************************/ InputData::InputData(string fName, string orderFileName, string f) : format(f){ try { m = MothurOut::getInstance(); ifstream ofHandle; util.openInputFile(orderFileName, ofHandle); string name; int count = 0; while(ofHandle){ ofHandle >> name; orderMap[name] = count; count++; gobble(ofHandle); } ofHandle.close(); util.openInputFile(fName, fileHandle); nextDistanceLabel = ""; otuTag = util.getTag(fName); } catch(exception& e) { m->errorOut(e, "InputData", "InputData"); exit(1); } } /***********************************************************************/ ListVector* InputData::getListVector(){ try { if(!fileHandle.eof()){ if(format == "list") { list = new ListVector(fileHandle, nextDistanceLabel, otuTag); if (list != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = list->getLabels(); } else { list->setLabels(currentLabels); } } }else{ list = nullptr; } gobble(fileHandle); return list; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getListVector"); exit(1); } } /***********************************************************************/ ListVector* InputData::getListVector(string label){ try { ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "list") { while (!in.eof()) { list = new ListVector(in, nextDistanceLabel, otuTag); nextDistanceLabel = list->getLabel(); if (list != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = list->getLabels(); } else { list->setLabels(currentLabels); } } //if you are at the last label if (nextDistanceLabel == label) { break; } //so you don't loose this memory else { delete list; } gobble(in); } }else{ list = nullptr; } in.close(); return list; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getListVector"); exit(1); } } /***********************************************************************/ ListVector* InputData::getListVector(string label, bool resetFP){ try { fileHandle.clear(); fileHandle.seekg(0); nextDistanceLabel = ""; if(fileHandle){ if (format == "list") { while (fileHandle.eof() != true) { list = new ListVector(fileHandle, nextDistanceLabel, otuTag); gobble(fileHandle); nextDistanceLabel = list->getLabel(); if (list != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = list->getLabels(); } else { list->setLabels(currentLabels); } } //if you are at the label you want if (nextDistanceLabel == label) { return list; } else { delete list; } //so you don't loose this memory } }else{ return nullptr; } } return nullptr; } catch(exception& e) { m->errorOut(e, "InputData", "getListVector"); exit(1); } } /***********************************************************************/ SharedListVector* InputData::getSharedListVector(){ try { if(fileHandle){ if (format == "shared") { SharedList = new SharedListVector(fileHandle, groups, nextDistanceLabel, otuTag); if (SharedList != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedList->getLabels(); } else { SharedList->setLabels(currentLabels); } } }else{ SharedList = nullptr; } gobble(fileHandle); return SharedList; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSharedListVector"); exit(1); } } /***********************************************************************/ SharedListVector* InputData::getSharedListVector(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "shared") { while (!in.eof()) { SharedList = new SharedListVector(in, groups, nextDistanceLabel, otuTag); thisLabel = SharedList->getLabel(); if (SharedList != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedList->getLabels(); } else { SharedList->setLabels(currentLabels); } } //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete SharedList; } gobble(in); } }else{ SharedList = nullptr; } in.close(); return SharedList; }else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSharedListVector"); exit(1); } } /***********************************************************************/ SharedOrderVector* InputData::getSharedOrderVector(){ try { if(fileHandle){ if (format == "sharedfile") { SharedOrder = new SharedOrderVector(fileHandle, groups, nextDistanceLabel); if (SharedOrder->getNumBins() == 0) { delete SharedOrder; SharedOrder = nullptr; } //no valid groups }else{ SharedOrder = nullptr; } gobble(fileHandle); return SharedOrder; }else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSharedOrderVector"); exit(1); } } /***********************************************************************/ SharedOrderVector* InputData::getSharedOrderVector(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "sharedfile") { while (!in.eof()) { SharedOrder = new SharedOrderVector(in, groups, nextDistanceLabel); thisLabel = SharedOrder->getLabel(); if (SharedOrder->getNumBins() == 0) { delete SharedOrder; SharedOrder = nullptr; break; } //no valid groups //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete SharedOrder; } gobble(in); } }else{ SharedOrder = nullptr; } in.close(); return SharedOrder; }else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSharedOrderVector"); exit(1); } } /***********************************************************************/ OrderVector* InputData::getOrderVector(){ try { if(fileHandle){ if((format == "list") || (format == "listorder")) { input = new ListVector(fileHandle, nextDistanceLabel, otuTag); } else if (format == "shared") { input = new SharedListVector(fileHandle, groups, nextDistanceLabel, otuTag); } else if(format == "rabund"){ input = new RAbundVector(fileHandle); } else if(format == "order"){ input = new OrderVector(fileHandle); } else if(format == "sabund"){ input = new SAbundVector(fileHandle); } gobble(fileHandle); output = new OrderVector(); *output = (input->getOrderVector()); return output; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getOrderVector"); exit(1); } } /***********************************************************************/ OrderVector* InputData::getOrderVector(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if((format == "list") || (format == "listorder")) { nextDistanceLabel = ""; while (!in.eof()) { input = new ListVector(in, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if (format == "shared") { nextDistanceLabel = ""; while (!in.eof()) { input = new SharedListVector(in, groups, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "rabund"){ while (!in.eof()) { input = new RAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "order"){ while (!in.eof()) { input = new OrderVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "sabund"){ while (!in.eof()) { input = new SAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } in.close(); output = new OrderVector(); *output = (input->getOrderVector()); return output; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getOrderVector"); exit(1); } } /***********************************************************************/ SharedRAbundVectors* InputData::getSharedRAbundVectors(){ try { if(fileHandle){ if (format == "sharedfile") { SharedRAbundVectors* shared = new SharedRAbundVectors(fileHandle, groups, nextDistanceLabel, otuTag); if (shared != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = shared->getOTUNames(); } else { shared->setOTUNames(currentLabels); } if (shared->getNumBins() == 0) { delete shared; shared = nullptr; } //no valid groups } return shared; }else if (format == "shared") { SharedList = new SharedListVector(fileHandle, groups, nextDistanceLabel, otuTag); if (SharedList != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedList->getLabels(); } else { SharedList->setLabels(currentLabels); } return SharedList->getSharedRAbundVector(); } } gobble(fileHandle); } //this is created to signal to calling function that the input file is at eof SharedRAbundVectors* null; null = nullptr; return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ SharedRAbundVectors* InputData::getSharedRAbundVectors(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "sharedfile") { while (!in.eof()) { SharedRAbundVectors* SharedRAbund = new SharedRAbundVectors(in, groups, nextDistanceLabel, otuTag); if (SharedRAbund != nullptr) { thisLabel = SharedRAbund->getLabel(); if (SharedRAbund->getNumBins() == 0) { delete SharedRAbund; SharedRAbund = nullptr; break; } //no valid groups //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedRAbund->getOTUNames(); } else { SharedRAbund->setOTUNames(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); return SharedRAbund; } else { delete SharedRAbund; } }else{ break; } gobble(in); } }else if (format == "shared") { while (!in.eof()) { SharedList = new SharedListVector(in, groups, nextDistanceLabel, otuTag); if (SharedList != nullptr) { thisLabel = SharedList->getLabel(); //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedList->getLabels(); } else { SharedList->setLabels(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); return SharedList->getSharedRAbundVector(); } else { //so you don't loose this memory delete SharedList; } }else{ break; } gobble(in); } } } //this is created to signal to calling function that the input file is at eof SharedRAbundVectors* null; null = (nullptr); in.close(); return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedRAbundVectors"); exit(1); } } /***********************************************************************/ //this is used when you don't need the order vector SharedRAbundFloatVectors* InputData::getSharedRAbundFloatVectors(){ try { if(fileHandle){ if (format == "relabund") { SharedRAbundFloatVectors* SharedRelAbund = new SharedRAbundFloatVectors(fileHandle, groups, nextDistanceLabel, otuTag); if (SharedRelAbund->getNumBins() == 0) { delete SharedRelAbund; SharedRelAbund = nullptr; } //no valid groups else { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedRelAbund->getOTUNames(); } else { SharedRelAbund->setOTUNames(currentLabels); } } return SharedRelAbund; }else if (format == "sharedfile") { SharedRAbundVectors* SharedRAbund = new SharedRAbundVectors(fileHandle, groups, nextDistanceLabel, otuTag); if (SharedRAbund != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedRAbund->getOTUNames(); } else { SharedRAbund->setOTUNames(currentLabels); } vector lookup = SharedRAbund->getSharedRAbundFloatVectors(); SharedRAbundFloatVectors* SharedRelAbund = new SharedRAbundFloatVectors(); SharedRelAbund->setOTUNames(currentLabels); for (int i = 0; i < lookup.size(); i++) { SharedRelAbund->push_back(lookup[i]); } delete SharedRAbund; return SharedRelAbund; } } gobble(fileHandle); } //this is created to signal to calling function that the input file is at eof SharedRAbundFloatVectors* null; null = (nullptr); return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedRAbundFloatVectors"); exit(1); } } /***********************************************************************/ SharedRAbundFloatVectors* InputData::getSharedRAbundFloatVectors(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "relabund") { while (!in.eof()) { SharedRAbundFloatVectors* SharedRelAbund = new SharedRAbundFloatVectors(in, groups, nextDistanceLabel, otuTag); if (SharedRelAbund != nullptr) { thisLabel = SharedRelAbund->getLabel(); if (SharedRelAbund->getNumBins() == 0) { delete SharedRelAbund; SharedRelAbund = nullptr; break; } //no valid groups //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedRelAbund->getOTUNames(); } else { SharedRelAbund->setOTUNames(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); return SharedRelAbund; } else { delete SharedRelAbund; } }else{ break; } gobble(in); } }else if (format == "sharedfile") { while (!in.eof()) { SharedRAbundVectors* SharedRAbund = new SharedRAbundVectors(in, groups, nextDistanceLabel, otuTag); if (SharedRAbund != nullptr) { thisLabel = SharedRAbund->getLabel(); //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedRAbund->getOTUNames(); } else { SharedRAbund->setOTUNames(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); vector lookup = SharedRAbund->getSharedRAbundFloatVectors(); SharedRAbundFloatVectors* SharedRelAbund = new SharedRAbundFloatVectors(); SharedRelAbund->setOTUNames(currentLabels); for (int i = 0; i < lookup.size(); i++) { SharedRelAbund->push_back(lookup[i]); } delete SharedRAbund; return SharedRelAbund; }else { delete SharedRAbund; } }else{ break; } gobble(in); } } } //this is created to signal to calling function that the input file is at eof SharedRAbundFloatVectors* null; null = (nullptr); in.close(); return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedRAbundFloatVectors"); exit(1); } } /***********************************************************************/ //this is used when you don't need the order vector SharedCLRVectors* InputData::getSharedCLRVectors(){ try { if(fileHandle){ if (format == "clrfile") { SharedCLRVectors* SharedCLR = new SharedCLRVectors(fileHandle, groups, nextDistanceLabel, otuTag); if (SharedCLR->getNumBins() == 0) { delete SharedCLR; SharedCLR = nullptr; } //no valid groups else { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedCLR->getOTUNames(); } else { SharedCLR->setOTUNames(currentLabels); } } return SharedCLR; } gobble(fileHandle); } //this is created to signal to calling function that the input file is at eof SharedCLRVectors* null; null = (nullptr); return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedCLRVectors"); exit(1); } } /***********************************************************************/ SharedCLRVectors* InputData::getSharedCLRVectors(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "clrfile") { while (!in.eof()) { SharedCLRVectors* SharedCLR = new SharedCLRVectors(in, groups, nextDistanceLabel, otuTag); if (SharedCLR != nullptr) { thisLabel = SharedCLR->getLabel(); if (SharedCLR->getNumBins() == 0) { delete SharedCLR; SharedCLR = nullptr; break; } //no valid groups //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = SharedCLR->getOTUNames(); } else { SharedCLR->setOTUNames(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); return SharedCLR; } else { delete SharedCLR; } }else{ break; } gobble(in); } } } //this is created to signal to calling function that the input file is at eof SharedCLRVectors* null; null = (nullptr); in.close(); return null; } catch(exception& e) { m->errorOut(e, "InputData", "getSharedCLRVectors"); exit(1); } } /***********************************************************************/ SAbundVector* InputData::getSAbundVector(){ try { if(fileHandle){ if (format == "list") { input = new ListVector(fileHandle, nextDistanceLabel, otuTag); } else if (format == "shared") { input = new SharedListVector(fileHandle, groups, nextDistanceLabel, otuTag); } else if(format == "rabund"){ input = new RAbundVector(fileHandle); } else if(format == "order"){ input = new OrderVector(fileHandle); } else if(format == "sabund"){ input = new SAbundVector(fileHandle); } gobble(fileHandle); sabund = new SAbundVector(); *sabund = (input->getSAbundVector()); return sabund; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSAbundVector"); exit(1); } } /***********************************************************************/ SAbundVector* InputData::getSAbundVector(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "list") { nextDistanceLabel = ""; while (!in.eof()) { input = new ListVector(in, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if (format == "shared") { nextDistanceLabel = ""; while (!in.eof()) { input = new SharedListVector(in, groups, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "rabund"){ while (!in.eof()) { input = new RAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "order"){ while (!in.eof()) { input = new OrderVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "sabund"){ while (!in.eof()) { input = new SAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } in.close(); sabund = new SAbundVector(); *sabund = (input->getSAbundVector()); return sabund; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getSAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector* InputData::getRAbundVector(){ try { if(fileHandle){ if (format == "list") { input = new ListVector(fileHandle, nextDistanceLabel, otuTag); } else if (format == "shared") { input = new SharedListVector(fileHandle, groups, nextDistanceLabel, otuTag); } else if(format == "rabund"){ input = new RAbundVector(fileHandle); } else if(format == "order"){ input = new OrderVector(fileHandle); } else if(format == "sabund"){ input = new SAbundVector(fileHandle); }else if (format == "sharedfile") { SharedRAbundVectors* shared = new SharedRAbundVectors(fileHandle, groups, nextDistanceLabel, otuTag); if (shared != nullptr) { //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = shared->getOTUNames(); } else { shared->setOTUNames(currentLabels); } if (shared->getNumBins() == 0) { delete shared; shared = nullptr; return nullptr; } //no valid groups } gobble(fileHandle); rabund = new RAbundVector(); *rabund = (shared->getRAbundVector()); delete shared; return rabund; } gobble(fileHandle); rabund = new RAbundVector(); *rabund = (input->getRAbundVector()); delete input; return rabund; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getRAbundVector"); exit(1); } } /***********************************************************************/ RAbundVector* InputData::getRAbundVector(string label){ try { string thisLabel; ifstream in; util.openInputFile(filename, in); nextDistanceLabel = ""; if(in){ if (format == "list") { nextDistanceLabel = ""; while (!in.eof()) { input = new ListVector(in, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if (format == "shared") { nextDistanceLabel = ""; while (!in.eof()) { input = new SharedListVector(in, groups, nextDistanceLabel, otuTag); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "rabund"){ while (!in.eof()) { input = new RAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "order"){ while (!in.eof()) { input = new OrderVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } } else if(format == "sabund"){ while (!in.eof()) { input = new SAbundVector(in); thisLabel = input->getLabel(); //if you are at the last label if (thisLabel == label) { break; } //so you don't loose this memory else { delete input; } gobble(in); } }else if (format == "sharedfile") { while (!in.eof()) { SharedRAbundVectors* shared = new SharedRAbundVectors(in, groups, nextDistanceLabel, otuTag); if (shared != nullptr) { thisLabel = shared->getLabel(); if (shared->getNumBins() == 0) { delete shared; shared = nullptr; in.close(); return nullptr; } //no valid groups //pass labels to others distances in file if (currentLabels.size() == 0) { currentLabels = shared->getOTUNames(); } else { shared->setOTUNames(currentLabels); } //if you are at the last label if (thisLabel == label) { in.close(); rabund = new RAbundVector(); *rabund = (shared->getRAbundVector()); delete shared; return rabund; } else { delete shared; } }else{ in.close(); return nullptr; } gobble(in); } } in.close(); rabund = new RAbundVector(); *rabund = (input->getRAbundVector()); return rabund; } else{ return nullptr; } } catch(exception& e) { m->errorOut(e, "InputData", "getRAbundVector"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/inputdata.h000077500000000000000000000036171424121717000165750ustar00rootroot00000000000000#ifndef INPUTDATA_H #define INPUTDATA_H #include "mothur.h" #include "ordervector.hpp" #include "sharedlistvector.h" #include "sharedordervector.h" #include "listvector.hpp" #include "sharedrabundvectors.hpp" #include "sharedrabundfloatvectors.hpp" #include "utils.hpp" class InputData { public: InputData(string, string, vector); InputData(string, string, string); ~InputData(); ListVector* getListVector(); ListVector* getListVector(string); //pass the label you want ListVector* getListVector(string, bool); //pass the label you want, reset filepointer SharedListVector* getSharedListVector(); SharedListVector* getSharedListVector(string); //pass the label you want OrderVector* getOrderVector(); OrderVector* getOrderVector(string); //pass the label you want SharedOrderVector* getSharedOrderVector(); SharedOrderVector* getSharedOrderVector(string); //pass the label you want SAbundVector* getSAbundVector(); SAbundVector* getSAbundVector(string); //pass the label you want RAbundVector* getRAbundVector(); RAbundVector* getRAbundVector(string); //pass the label you want SharedRAbundVectors* getSharedRAbundVectors(); SharedRAbundVectors* getSharedRAbundVectors(string); //pass the label you want SharedRAbundFloatVectors* getSharedRAbundFloatVectors(); SharedRAbundFloatVectors* getSharedRAbundFloatVectors(string); //pass the label you want SharedCLRVectors* getSharedCLRVectors(); SharedCLRVectors* getSharedCLRVectors(string); //pass the label you want private: Utils util; string format; ifstream fileHandle; DataVector* input; ListVector* list; SharedListVector* SharedList; OrderVector* output; SharedOrderVector* SharedOrder; SAbundVector* sabund; RAbundVector* rabund; map orderMap; string filename; MothurOut* m; vector currentLabels; vector groups; string nextDistanceLabel; string otuTag; }; #endif mothur-1.48.0/source/libshuff.cpp000077500000000000000000000070521424121717000167360ustar00rootroot00000000000000/* * libshuffform.cpp * Mothur * * Created by Pat Schloss on 4/8/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "libshuff.h" /***********************************************************************/ void swap(int& i,int& j){ int t = i; i = j; j = t; } /***********************************************************************/ Libshuff::Libshuff(FullMatrix* D, int it, float step, float co) : matrix(D), iters(it), stepSize(step), cutOff(co){ try{ m = MothurOut::getInstance(); groupNames = matrix->getGroups(); groupSizes = matrix->getSizes(); numGroups = matrix->getNumGroups(); initializeGroups(matrix); } catch(exception& e) { m->errorOut(e, "Libshuff", "Libshuff"); exit(1); } } /***********************************************************************/ void Libshuff::initializeGroups(FullMatrix* matrix){ try{ groups.resize(numGroups); savedGroups.resize(numGroups); savedGroups.resize(numGroups); for(int i=0;ierrorOut(e, "Libshuff", "initializeGroups"); exit(1); } } /***********************************************************************/ vector > > Libshuff::getSavedMins(){ return savedMins; } /***********************************************************************/ vector Libshuff::getMinX(int x){ try{ vector minX(groupSizes[x], 0); for(int i=0;i 1 ? (i==0 ? matrix->get(groups[x][0], groups[x][1]) : matrix->get(groups[x][i], groups[x][0])) : 0.0); //get the first value in row i of this block //minX[i] = matrix->get(groups[x][i], groups[x][0]); for(int j=0;jget(groups[x][i], groups[x][j]); if(dx < minX[i]){ minX[i] = dx; } } } } return minX; } catch(exception& e) { m->errorOut(e, "Libshuff", "getMinX"); exit(1); } } /***********************************************************************/ vector Libshuff::getMinXY(int x, int y){ try{ vector minXY(groupSizes[x], 0); for(int i=0;iget(groups[x][i], groups[y][0]); for(int j=0;jget(groups[x][i], groups[y][j]); if(dxyerrorOut(e, "Libshuff", "getMinXY"); exit(1); } } /***********************************************************************/ void Libshuff::randomizeGroups(int x, int y){ try{ int nv = groupSizes[x]+groupSizes[y]; vector v(nv); int index=0; for(int k=0;k0;k--){ int z = util.getRandomIndex(k); swap(v[z],v[k]); } index=0; for(int k=0;kerrorOut(e, "Libshuff", "randomizeGroups"); exit(1); } } /***********************************************************************/ void Libshuff::resetGroup(int x){ for(int k=0;k > evaluateAll() = 0; virtual float evaluatePair(int,int) = 0; void randomizeGroups(int, int); void resetGroup(int); vector > > getSavedMins(); protected: void initializeGroups(FullMatrix*); vector getMinX(int); vector getMinXY(int, int); vector > > savedMins; MothurOut* m; FullMatrix* matrix; vector groupSizes; vector groupNames; vector > groups; vector > savedGroups; vector minX; vector minXY; float cutOff, stepSize; int iters, numGroups; Utils util; }; #endif mothur-1.48.0/source/linearalgebra.cpp000077500000000000000000002370031424121717000177250ustar00rootroot00000000000000/* * linearalgebra.cpp * mothur * * Created by westcott on 1/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "linearalgebra.h" #include "wilcox.h" #define PI 3.1415926535897932384626433832795 // This class references functions used from "Numerical Recipes in C++" // /*********************************************************************************************************************************/ inline double SQR(const double a) { return a*a; } /*********************************************************************************************************************************/ inline double SIGN(const double a, const double b) { return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a); } /*********************************************************************************************************************************/ //NUmerical recipes pg. 245 - Returns the complementary error function erfc(x) with fractional error everywhere less than 1.2 × 10−7. double LinearAlgebra::erfcc(double x){ try { double t,z,ans; z=fabs(x); t=1.0/(1.0+0.5*z); ans=t*exp(-z*z-1.26551223+t*(1.00002368+t*(0.37409196+t*(0.09678418+ t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+ t*(-0.82215223+t*0.17087277))))))))); return (x >= 0.0 ? ans : 2.0 - ans); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "betai"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 232 double LinearAlgebra::betai(const double a, const double b, const double x) { try { double bt; double result = 0.0; if (x < 0.0 || x > 1.0) { m->mothurOut("[ERROR]: bad x in betai.\n"); m->setControl_pressed(true); return 0.0; } if (util.isEqual(x,0.0) || util.isEqual(x,1.0)) { bt = 0.0; } else { bt = exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(1.0-x)); } if (x < (a+1.0) / (a+b+2.0)) { result = bt*betacf(a,b,x)/a; } else { result = 1.0-bt*betacf(b,a,1.0-x)/b; } return result; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "betai"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 219 double LinearAlgebra::gammln(const double xx) { try { int j; double x,y,tmp,ser; static const double cof[6]={76.18009172947146,-86.50532032941677,24.01409824083091, -1.231739572450155,0.120858003e-2,-0.536382e-5}; y=x=xx; tmp=x+5.5; tmp -= (x+0.5)*log(tmp); ser=1.0; for (j=0;j<6;j++) { ser += cof[j]/++y; } return -tmp+log(2.5066282746310005*ser/x); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "gammln"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 223 double LinearAlgebra::gammp(const double a, const double x) { try { double gamser,gammcf,gln; if (x < 0.0 || a <= 0.0) { m->mothurOut("[ERROR]: Invalid arguments in routine GAMMP\n"); m->setControl_pressed(true); return 0.0; } if (x < (a+1.0)) { gser(gamser,a,x,gln); return gamser; } else { gcf(gammcf,a,x,gln); return 1.0-gammcf; } return 0; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "gammp"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 224 double LinearAlgebra::gcf(double& gammcf, const double a, const double x, double& gln){ try { const int ITMAX=100; const double EPS=numeric_limits::epsilon(); const double FPMIN=numeric_limits::min()/EPS; int i; double an,b,c,d,del,h; gln=gammln(a); b=x+1.0-a; c=1.0/FPMIN; d=1.0/b; h=d; for (i=1;i<=ITMAX;i++) { an = -i*(i-a); b += 2.0; d=an*d+b; if (fabs(d) < FPMIN) { d=FPMIN; } c=b+an/c; if (fabs(c) < FPMIN) { c=FPMIN; } d=1.0/d; del=d*c; h *= del; if (fabs(del-1.0) <= EPS) break; } if (i > ITMAX) { m->mothurOut("[ERROR]: " + toString(a) + " too large, ITMAX=100 too small in gcf\n"); m->setControl_pressed(true); } gammcf=exp(-x+a*log(x)-gln)*h; return 0.0; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "gcf"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 223 double LinearAlgebra::gser(double& gamser, const double a, const double x, double& gln) { try { int n; double sum,del,ap; const double EPS = numeric_limits::epsilon(); gln=gammln(a); if (x <= 0.0) { if (x < 0.0) { m->mothurOut("[ERROR]: x less than 0 in routine GSER\n"); m->setControl_pressed(true); } gamser=0.0; return 0.0; } else { ap=a; del=sum=1.0/a; for (n=0;n<100;n++) { ++ap; del *= x/ap; sum += del; if (fabs(del) < fabs(sum)*EPS) { gamser=sum*exp(-x+a*log(x)-gln); return 0.0; } } m->mothurOut("[ERROR]: a too large, ITMAX too small in routine GSER\n"); return 0.0; } return 0; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "gser"); exit(1); } } /*********************************************************************************************************************************/ //Numerical Recipes pg. 233 double LinearAlgebra::betacf(const double a, const double b, const double x) { try { const int MAXIT = 100; const double EPS = numeric_limits::epsilon(); const double FPMIN = numeric_limits::min() / EPS; int m1, m2; double aa, c, d, del, h, qab, qam, qap; qab=a+b; qap=a+1.0; qam=a-1.0; c=1.0; d=1.0-qab*x/qap; if (fabs(d) < FPMIN) d=FPMIN; d=1.0/d; h=d; for (m1=1;m1<=MAXIT;m1++) { m2=2*m1; aa=m1*(b-m1)*x/((qam+m2)*(a+m2)); d=1.0+aa*d; if (fabs(d) < FPMIN) d=FPMIN; c=1.0+aa/c; if (fabs(c) < FPMIN) c=FPMIN; d=1.0/d; h *= d*c; aa = -(a+m1)*(qab+m1)*x/((a+m2)*(qap+m2)); d=1.0+aa*d; if (fabs(d) < FPMIN) d=FPMIN; c=1.0+aa/c; if (fabs(c) < FPMIN) c=FPMIN; d=1.0/d; del=d*c; h *= del; if (fabs(del-1.0) < EPS) break; } if (m1 > MAXIT) { m->mothurOut("[ERROR]: a or b too big or MAXIT too small in betacf.\n"); m->setControl_pressed(true); } return h; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "betacf"); exit(1); } } /*********************************************************************************************************************************/ //[3][4] * [4][5] - columns in first must match rows in second, returns matrix[3][5] vector > LinearAlgebra::matrix_mult(vector > first, vector > second){ try { vector > product; int first_rows = first.size(); int first_cols = first[0].size(); int second_cols = second[0].size(); product.resize(first_rows); for(int i=0;igetControl_pressed()) { return product; } product[i][j] = 0.0; for(int k=0;kerrorOut(e, "LinearAlgebra", "matrix_mult"); exit(1); } } /*********************************************************************************************************************************/ vector > LinearAlgebra::transpose(vector >matrix){ try { vector > trans; trans.resize(matrix[0].size()); for (int i = 0; i < trans.size(); i++) { for (int j = 0; j < matrix.size(); j++) { trans[i].push_back(matrix[j][i]); } } return trans; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "transpose"); exit(1); } } /*********************************************************************************************************************************/ void LinearAlgebra::recenter(double offset, vector > D, vector >& G){ try { int rank = D.size(); vector > A(rank); vector > C(rank); for(int i=0;ierrorOut(e, "LinearAlgebra", "recenter"); exit(1); } } /*********************************************************************************************************************************/ // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479 int LinearAlgebra::tred2(vector >& a, vector& d, vector& e){ try { double scale, hh, h, g, f; int n = a.size(); d.resize(n); e.resize(n); for(int i=n-1;i>0;i--){ int l=i-1; h = scale = 0.0000; if(l>0){ for(int k=0;k= 0.0 ? -sqrt(h) : sqrt(h)); e[i] = scale * g; h -= f * g; a[i][l] = f - g; f = 0.0; for(int j=0;jerrorOut(e, "LinearAlgebra", "tred2"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::pythag(double a, double b) { return(pow(a*a+b*b,0.5)); } /*********************************************************************************************************************************/ // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479 int LinearAlgebra::qtli(vector& d, vector& e, vector >& z) { try { int myM, i, iter; double s, r, p, g, f, dd, c, b; int n = d.size(); for(int i=1;i<=n;i++){ e[i-1] = e[i]; } e[n-1] = 0.0000; for(int l=0;l=l;i--){ f = s * e[i]; b = c * e[i]; e[i+1] = (r=pythag(f,g)); if(util.isEqual(r,0.0)){ d[i+1] -= p; e[myM] = 0.0000; break; } s = f / r; c = g / r; g = d[i+1] - p; r = (d[i] - g) * s + 2.0 * c * b; d[i+1] = g + ( p = s * r); g = c * r - b; for(int k=0;k= l) continue; d[l] -= p; e[l] = g; e[myM] = 0.0; } } while (myM != l); } int k; for(int i=0;i= p){ p=d[k=j]; } } if(k!=i){ d[k]=d[i]; d[i]=p; for(int j=0;jerrorOut(e, "LinearAlgebra", "qtli"); exit(1); } } /*********************************************************************************************************************************/ //groups by dimension vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes, int dimensions){ try { //make square matrix vector< vector > dists; dists.resize(axes.size()); for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); } if (dimensions == 1) { //one dimension calc = abs(x-y) for (int i = 0; i < dists.size(); i++) { if (m->getControl_pressed()) { return dists; } for (int j = 0; j < i; j++) { dists[i][j] = abs(axes[i][0] - axes[j][0]); dists[j][i] = dists[i][j]; } } }else if (dimensions > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... for (int i = 0; i < dists.size(); i++) { if (m->getControl_pressed()) { return dists; } for (int j = 0; j < i; j++) { double sum = 0.0; for (int k = 0; k < dimensions; k++) { sum += ((axes[i][k] - axes[j][k]) * (axes[i][k] - axes[j][k])); } dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } } return dists; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); exit(1); } } /*********************************************************************************************************************************/ //returns groups by dimensions from dimensions by groups vector< vector > LinearAlgebra::calculateEuclidianDistance(vector< vector >& axes){ try { //make square matrix vector< vector > dists; dists.resize(axes[0].size()); for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes[0].size(), 0.0); } if (axes.size() == 1) { //one dimension calc = abs(x-y) for (int i = 0; i < dists.size(); i++) { if (m->getControl_pressed()) { return dists; } for (int j = 0; j < i; j++) { dists[i][j] = abs(axes[0][i] - axes[0][j]); dists[j][i] = dists[i][j]; } } }else if (axes.size() > 1) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)... for (int i = 0; i < dists[0].size(); i++) { if (m->getControl_pressed()) { return dists; } for (int j = 0; j < i; j++) { double sum = 0.0; for (int k = 0; k < axes.size(); k++) { sum += ((axes[k][i] - axes[k][j]) * (axes[k][i] - axes[k][j])); } dists[i][j] = sqrt(sum); dists[j][i] = dists[i][j]; } } } return dists; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance"); exit(1); } } /*********************************************************************************************************************************/ //assumes both matrices are square and the same size double LinearAlgebra::calcPearson(vector< vector >& euclidDists, vector< vector >& userDists){ try { //find average for - X int count = 0; float averageEuclid = 0.0; for (int i = 0; i < euclidDists.size(); i++) { for (int j = 0; j < i; j++) { averageEuclid += euclidDists[i][j]; count++; } } averageEuclid = averageEuclid / (float) count; //find average for - Y count = 0; float averageUser = 0.0; for (int i = 0; i < userDists.size(); i++) { for (int j = 0; j < i; j++) { averageUser += userDists[i][j]; count++; } } averageUser = averageUser / (float) count; double numerator = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int i = 0; i < euclidDists.size(); i++) { for (int k = 0; k < i; k++) { //just lt dists float Yi = userDists[i][k]; float Xi = euclidDists[i][k]; numerator += ((Xi - averageEuclid) * (Yi - averageUser)); denomTerm1 += ((Xi - averageEuclid) * (Xi - averageEuclid)); denomTerm2 += ((Yi - averageUser) * (Yi - averageUser)); } } double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); double r = numerator / denom; //divide by zero error if (isnan(r) || isinf(r)) { r = 0.0; } return r; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcPearson"); exit(1); } } /*********************************************************************************************************************************/ //assumes both matrices are square and the same size double LinearAlgebra::calcSpearman(vector< vector >& euclidDists, vector< vector >& userDists){ try { double r; //format data map tableX; map::iterator itTable; vector scores; for (int i = 0; i < euclidDists.size(); i++) { for (int j = 0; j < i; j++) { spearmanRank member(toString(scores.size()), euclidDists[i][j]); scores.push_back(member); //count number of repeats itTable = tableX.find(euclidDists[i][j]); if (itTable == tableX.end()) { tableX[euclidDists[i][j]] = 1; }else { tableX[euclidDists[i][j]]++; } } } //sort scores sort(scores.begin(), scores.end(), compareSpearman); //calc LX double Lx = 0.0; for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) { double tx = (double) itTable->second; Lx += ((pow(tx, 3.0) - tx) / 12.0); } //find ranks of xi map rankEuclid; vector ties; int rankTotal = 0; for (int j = 0; j < scores.size(); j++) { rankTotal += (j+1); ties.push_back(scores[j]); if (j != (scores.size()-1)) { // you are not the last so you can look ahead if (scores[j].score != scores[j+1].score) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankEuclid[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankEuclid[ties[k].name] = thisrank; } } } //format data map tableY; scores.clear(); for (int i = 0; i < userDists.size(); i++) { for (int j = 0; j < i; j++) { spearmanRank member(toString(scores.size()), userDists[i][j]); scores.push_back(member); //count number of repeats itTable = tableY.find(userDists[i][j]); if (itTable == tableY.end()) { tableY[userDists[i][j]] = 1; }else { tableY[userDists[i][j]]++; } } } //sort scores sort(scores.begin(), scores.end(), compareSpearman); //calc LX double Ly = 0.0; for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { double ty = (double) itTable->second; Ly += ((pow(ty, 3.0) - ty) / 12.0); } //find ranks of yi map rankUser; ties.clear(); rankTotal = 0; for (int j = 0; j < scores.size(); j++) { rankTotal += (j+1); ties.push_back(scores[j]); if (j != (scores.size()-1)) { // you are not the last so you can look ahead if (!util.isEqual(scores[j].score, scores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankUser[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankUser[ties[k].name] = thisrank; } } } double di = 0.0; int count = 0; for (int i = 0; i < userDists.size(); i++) { for (int j = 0; j < i; j++) { float xi = rankEuclid[toString(count)]; float yi = rankUser[toString(count)]; di += ((xi - yi) * (xi - yi)); count++; } } double n = (double) count; double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx; double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; r = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); //divide by zero error if (isnan(r) || isinf(r)) { r = 0.0; } return r; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcSpearman"); exit(1); } } /*********************************************************************************************************************************/ //assumes both matrices are square and the same size double LinearAlgebra::calcKendall(vector< vector >& euclidDists, vector< vector >& userDists){ try { double r; //format data vector scores; for (int i = 0; i < euclidDists.size(); i++) { for (int j = 0; j < i; j++) { spearmanRank member(toString(scores.size()), euclidDists[i][j]); scores.push_back(member); } } //sort scores sort(scores.begin(), scores.end(), compareSpearman); //find ranks of xi map rankEuclid; vector ties; int rankTotal = 0; for (int j = 0; j < scores.size(); j++) { rankTotal += (j+1); ties.push_back(scores[j]); if (j != (scores.size()-1)) { // you are not the last so you can look ahead if (!util.isEqual(scores[j].score, scores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankEuclid[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankEuclid[ties[k].name] = thisrank; } } } vector scoresUser; for (int i = 0; i < userDists.size(); i++) { for (int j = 0; j < i; j++) { spearmanRank member(toString(scoresUser.size()), userDists[i][j]); scoresUser.push_back(member); } } //sort scores sort(scoresUser.begin(), scoresUser.end(), compareSpearman); //find ranks of yi map rankUser; ties.clear(); rankTotal = 0; for (int j = 0; j < scoresUser.size(); j++) { rankTotal += (j+1); ties.push_back(scoresUser[j]); if (j != (scoresUser.size()-1)) { // you are not the last so you can look ahead if (!util.isEqual(scoresUser[j].score, scoresUser[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankUser[ties[k].name] = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); rankUser[ties[k].name] = thisrank; } } } int numCoor = 0; int numDisCoor = 0; //order user ranks vector user; for (int l = 0; l < scores.size(); l++) { spearmanRank member(scores[l].name, rankUser[scores[l].name]); user.push_back(member); } int count = 0; for (int l = 0; l < scores.size(); l++) { int numWithHigherRank = 0; int numWithLowerRank = 0; float thisrank = user[l].score; for (int u = l+1; u < scores.size(); u++) { if (user[u].score > thisrank) { numWithHigherRank++; } else if (user[u].score < thisrank) { numWithLowerRank++; } count++; } numCoor += numWithHigherRank; numDisCoor += numWithLowerRank; } r = (numCoor - numDisCoor) / (float) count; //divide by zero error if (isnan(r) || isinf(r)) { r = 0.0; } return r; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcKendall"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcKendall(vector& x, vector& y, double& sig){ try { if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch.\n"); return 0.0; } //format data vector xscores; for (int i = 0; i < x.size(); i++) { spearmanRank member(toString(i), x[i]); xscores.push_back(member); } //sort xscores sort(xscores.begin(), xscores.end(), compareSpearman); //convert scores to ranks of x vector ties; int rankTotal = 0; for (int j = 0; j < xscores.size(); j++) { rankTotal += (j+1); ties.push_back(&(xscores[j])); if (j != xscores.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(xscores[j].score, xscores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } } } //format data vector yscores; for (int j = 0; j < y.size(); j++) { spearmanRank member(toString(j), y[j]); yscores.push_back(member); } //sort yscores sort(yscores.begin(), yscores.end(), compareSpearman); //convert to ranks map rank; vector yties; rankTotal = 0; for (int j = 0; j < yscores.size(); j++) { rankTotal += (j+1); yties.push_back(yscores[j]); if (j != yscores.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(yscores[j].score, yscores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < yties.size(); k++) { float thisrank = rankTotal / (float) yties.size(); rank[yties[k].name] = thisrank; } yties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < yties.size(); k++) { float thisrank = rankTotal / (float) yties.size(); rank[yties[k].name] = thisrank; } } } int numCoor = 0; int numDisCoor = 0; //associate x and y vector otus; for (int l = 0; l < xscores.size(); l++) { spearmanRank member(xscores[l].name, rank[xscores[l].name]); otus.push_back(member); } int count = 0; for (int l = 0; l < xscores.size(); l++) { int numWithHigherRank = 0; int numWithLowerRank = 0; float thisrank = otus[l].score; for (int u = l+1; u < xscores.size(); u++) { if (otus[u].score > thisrank) { numWithHigherRank++; } else if (otus[u].score < thisrank) { numWithLowerRank++; } count++; } numCoor += numWithHigherRank; numDisCoor += numWithLowerRank; } double p = (numCoor - numDisCoor) / (float) count; sig = calcKendallSig(x.size(), p); return p; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcKendall"); exit(1); } } double LinearAlgebra::ran0(int& idum) { const int IA=16807,IM=2147483647,IQ=127773; const int IR=2836,MASK=123459876; const double AM=1.0/double(IM); int k; double ans; idum ^= MASK; k=idum/IQ; idum=IA*(idum-k*IQ)-IR*k; if (idum < 0) idum += IM; ans=AM*idum; idum ^= MASK; return ans; } double LinearAlgebra::ran1(int &idum) { const int IA=16807,IM=2147483647,IQ=127773,IR=2836,NTAB=32; const int NDIV=(1+(IM-1)/NTAB); const double EPS=3.0e-16,AM=1.0/IM,RNMX=(1.0-EPS); static int iy=0; static vector iv(NTAB); int j,k; double temp; if (idum <= 0 || !iy) { if (-idum < 1) idum=1; else idum = -idum; for (j=NTAB+7;j>=0;j--) { k=idum/IQ; idum=IA*(idum-k*IQ)-IR*k; if (idum < 0) idum += IM; if (j < NTAB) iv[j] = idum; } iy=iv[0]; } k=idum/IQ; idum=IA*(idum-k*IQ)-IR*k; if (idum < 0) idum += IM; j=iy/NDIV; iy=iv[j]; iv[j] = idum; if ((temp=AM*iy) > RNMX) return RNMX; else return temp; } double LinearAlgebra::ran2(int &idum) { const int IM1=2147483563,IM2=2147483399; const int IA1=40014,IA2=40692,IQ1=53668,IQ2=52774; const int IR1=12211,IR2=3791,NTAB=32,IMM1=IM1-1; const int NDIV=1+IMM1/NTAB; const double EPS=3.0e-16,RNMX=1.0-EPS,AM=1.0/double(IM1); static int idum2=123456789,iy=0; static vector iv(NTAB); int j,k; double temp; if (idum <= 0) { idum=(idum==0 ? 1 : -idum); idum2=idum; for (j=NTAB+7;j>=0;j--) { k=idum/IQ1; idum=IA1*(idum-k*IQ1)-k*IR1; if (idum < 0) idum += IM1; if (j < NTAB) iv[j] = idum; } iy=iv[0]; } k=idum/IQ1; idum=IA1*(idum-k*IQ1)-k*IR1; if (idum < 0) idum += IM1; k=idum2/IQ2; idum2=IA2*(idum2-k*IQ2)-k*IR2; if (idum2 < 0) idum2 += IM2; j=iy/NDIV; iy=iv[j]-idum2; iv[j] = idum; if (iy < 1) iy += IMM1; if ((temp=AM*iy) > RNMX) return RNMX; else return temp; } double LinearAlgebra::ran3(int &idum) { static int inext,inextp; static int iff=0; const int MBIG=1000000000,MSEED=161803398,MZ=0; const double FAC=(1.0/MBIG); static vector ma(56); int i,ii,k,mj,mk; if (idum < 0 || iff == 0) { iff=1; mj=labs(MSEED-labs(idum)); mj %= MBIG; ma[55]=mj; mk=1; for (i=1;i<=54;i++) { ii=(21*i) % 55; ma[ii]=mk; mk=mj-mk; if (mk < int(MZ)) mk += MBIG; mj=ma[ii]; } for (k=0;k<4;k++) for (i=1;i<=55;i++) { ma[i] -= ma[1+(i+30) % 55]; if (ma[i] < int(MZ)) ma[i] += MBIG; } inext=0; inextp=31; idum=1; } if (++inext == 56) inext=1; if (++inextp == 56) inextp=1; mj=ma[inext]-ma[inextp]; if (mj < int(MZ)) mj += MBIG; ma[inext]=mj; return mj*FAC; } double LinearAlgebra::ran4(int &idum) { #if defined(vax) || defined(_vax_) || defined(__vax__) || defined(VAX) static const unsigned long jflone = 0x00004080; static const unsigned long jflmsk = 0xffff007f; #else static const unsigned long jflone = 0x3f800000; static const unsigned long jflmsk = 0x007fffff; #endif unsigned long irword,itemp,lword; static int idums = 0; if (idum < 0) { idums = -idum; idum=1; } irword=idum; lword=idums; psdes(lword,irword); itemp=jflone | (jflmsk & irword); ++idum; return (*(float *)&itemp)-1.0; } void LinearAlgebra::psdes(unsigned long &lword, unsigned long &irword) { const int NITER=4; static const unsigned long c1[NITER]={ 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L}; static const unsigned long c2[NITER]={ 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L}; unsigned long i,ia,ib,iswap,itmph=0,itmpl=0; for (i=0;i> 16; ib=itmpl*itmpl+ ~(itmph*itmph); irword=lword ^ (((ia = (ib >> 16) | ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph); lword=iswap; } } /*********************************************************************************************************************************/ double LinearAlgebra::calcKendallSig(double n, double r){ try { double sig = 0.0; double svar=(4.0*n+10.0)/(9.0*n*(n-1.0)); double z= r/sqrt(svar); sig=erfcc(fabs(z)/1.4142136); if (isnan(sig) || isinf(sig)) { sig = 0.0; } return sig; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcKendallSig"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcKruskalWallis(vector& values, double& pValue){ try { double H; set treatments; //rank values sort(values.begin(), values.end(), compareSpearman); vector ties; int rankTotal = 0; vector TIES; for (int j = 0; j < values.size(); j++) { treatments.insert(values[j].name); rankTotal += (j+1); ties.push_back(&(values[j])); if (j != values.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(values[j].score, values[j+1].score)) { // you are done with ties, rank them and continue if (ties.size() > 1) { TIES.push_back(ties.size()); } for (int k = 0; k < ties.size(); k++) { double thisrank = rankTotal / (double) ties.size(); (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one if (ties.size() > 1) { TIES.push_back(ties.size()); } for (int k = 0; k < ties.size(); k++) { double thisrank = rankTotal / (double) ties.size(); (*ties[k]).score = thisrank; } } } // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1) map sums; map counts; for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; } for (int j = 0; j < values.size(); j++) { sums[values[j].name] += values[j].score; counts[values[j].name]+= 1.0; } double middleTerm = 0.0; for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { middleTerm += ((sums[*it]*sums[*it])/counts[*it]); } double firstTerm = 12 / (double) (values.size()*(values.size()+1)); double lastTerm = 3 * (values.size()+1); H = (firstTerm * middleTerm) - lastTerm; H = (int)(H*1000.0)/(1000.0); //resolves floating point issue //adjust for ties if (TIES.size() != 0) { double sum = 0.0; for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } long long valuesDenom = ((values.size()*values.size()*values.size())-values.size()); double result = 1.0 - (sum / (double) valuesDenom); H /= result; } if (isnan(H) || isinf(H)) { H = 0; } //Numerical Recipes pg221 pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0)); return H; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcKruskalWallis"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::normalvariate(double mean, double standardDeviation) { try { double u1 = util.getRandomDouble0to1(); double u2 = util.getRandomDouble0to1(); return cos(8.*atan(1.)*u2)*sqrt(-2.*log(u1)); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "normalvariate"); exit(1); } } /*********************************************************************************************************************************/ //thanks http://www.johndcook.com/cpp_phi.html double LinearAlgebra::pnorm(double x){ try { // constants double a1 = 0.254829592; double a2 = -0.284496736; double a3 = 1.421413741; double a4 = -1.453152027; double a5 = 1.061405429; double p = 0.3275911; // Save the sign of x int sign = 1; if (x < 0) sign = -1; x = fabs(x)/sqrt(2.0); // A&S formula 7.1.26 double t = 1.0/(1.0 + p*x); double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x); return 0.5*(1.0 + sign*y); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "pnorm"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcWilcoxon(vector& x, vector& y, double& sig){ try { double W = 0.0; sig = 0.0; vector ranks; for (int i = 0; i < x.size(); i++) { if (m->getControl_pressed()) { return W; } spearmanRank member("x", x[i]); ranks.push_back(member); } for (int i = 0; i < y.size(); i++) { if (m->getControl_pressed()) { return W; } spearmanRank member("y", y[i]); ranks.push_back(member); } //sort values sort(ranks.begin(), ranks.end(), compareSpearman); //convert scores to ranks of x vector ties; int rankTotal = 0; vector TIES; for (int j = 0; j < ranks.size(); j++) { if (m->getControl_pressed()) { return W; } rankTotal += (j+1); ties.push_back(&(ranks[j])); if (j != ranks.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(ranks[j].score, ranks[j+1].score)) { // you are done with ties, rank them and continue if (ties.size() > 1) { TIES.push_back(ties.size()); } for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } ties.clear(); rankTotal = 0; } }else { // you are the last one if (ties.size() > 1) { TIES.push_back(ties.size()); } for (int k = 0; k < ties.size(); k++) { float thisrank = rankTotal / (float) ties.size(); (*ties[k]).score = thisrank; } } } //from R wilcox.test function //STATISTIC <- sum(r[seq_along(x)]) - n.x * (n.x + 1)/2 double sumRanks = 0.0; for (int i = 0; i < ranks.size(); i++) { if (m->getControl_pressed()) { return W; } if (ranks[i].name == "x") { sumRanks += ranks[i].score; } } W = sumRanks - x.size() * ((double)(x.size() + 1)) / 2.0; //exact <- (n.x < 50) && (n.y < 50) bool findExact = false; if ((x.size() < 50) && (y.size() < 50)) { findExact = true; } if (findExact && (TIES.size() == 0)) { //find exact and no ties //PVAL <- switch(alternative, two.sided = { //p <- if (STATISTIC > (n.x * n.y/2)) PWilcox wilcox; double pval = 0.0; if (W > ((double)x.size()*y.size()/2.0)) { //pwilcox(STATISTIC-1, n.x, n.y, lower.tail = FALSE) pval = wilcox.pwilcox(W-1, x.size(), y.size(), false); }else { //pwilcox(STATISTIC,n.x, n.y) pval = wilcox.pwilcox(W, x.size(), y.size(), true); } sig = 2.0 * pval; if (1.0 < sig) { sig = 1.0; } }else { //z <- STATISTIC - n.x * n.y/2 double z = W - (double)(x.size() * y.size()/2.0); //NTIES <- table(r) double sum = 0.0; for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } //SIGMA <- sqrt((n.x * n.y/12) * ((n.x + n.y + 1) - //sum(NTIES^3 - NTIES)/((n.x + n.y) * (n.x + n.y - //1)))) double sigma = 0.0; double firstTerm = (double)(x.size() * y.size()/12.0); double secondTerm = (double)(x.size() + y.size() + 1) - sum / (double)((x.size() + y.size()) * (x.size() + y.size() - 1)); sigma = sqrt(firstTerm * secondTerm); //CORRECTION <- switch(alternative, two.sided = sign(z) * 0.5, greater = 0.5, less = -0.5) double CORRECTION = 0.0; if (z < 0) { CORRECTION = -1.0; } else if (z > 0) { CORRECTION = 1.0; } CORRECTION *= 0.5; z = (z - CORRECTION)/sigma; //PVAL <- switch(alternative, two.sided = 2 * min(pnorm(z), pnorm(z, lower.tail = FALSE))) sig = pnorm(z); if ((1.0-sig) < sig) { sig = 1.0 - sig; } sig *= 2; } return W; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcWilcoxon"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::choose(double n, double k){ try { n = floor(n + 0.5); k = floor(k + 0.5); double lchoose = gammln(n + 1.0) - gammln(k + 1.0) - gammln(n - k + 1.0); return (floor(exp(lchoose) + 0.5)); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "choose"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearman(vector& x, vector& y, double& sig){ try { if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch.\n"); return 0.0; } //format data double sf = 0.0; //f^3 - f where f is the number of ties in x; double sg = 0.0; //f^3 - f where f is the number of ties in y; map tableX; map::iterator itTable; vector xscores; for (int i = 0; i < x.size(); i++) { spearmanRank member(toString(i), x[i]); xscores.push_back(member); //count number of repeats itTable = tableX.find(x[i]); if (itTable == tableX.end()) { tableX[x[i]] = 1; }else { tableX[x[i]]++; } } //calc LX double Lx = 0.0; for (itTable = tableX.begin(); itTable != tableX.end(); itTable++) { double tx = (double) itTable->second; Lx += ((pow(tx, 3.0) - tx) / 12.0); } //sort x sort(xscores.begin(), xscores.end(), compareSpearman); //convert scores to ranks of x //convert to ranks map rankx; vector xties; int rankTotal = 0; for (int j = 0; j < xscores.size(); j++) { rankTotal += (j+1); xties.push_back(xscores[j]); if (j != xscores.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(xscores[j].score, xscores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < xties.size(); k++) { float thisrank = rankTotal / (float) xties.size(); rankx[xties[k].name] = thisrank; } int t = xties.size(); sf += (t*t*t-t); xties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < xties.size(); k++) { float thisrank = rankTotal / (float) xties.size(); rankx[xties[k].name] = thisrank; } } } //format x vector yscores; map tableY; for (int j = 0; j < y.size(); j++) { spearmanRank member(toString(j), y[j]); yscores.push_back(member); itTable = tableY.find(member.score); if (itTable == tableY.end()) { tableY[member.score] = 1; }else { tableY[member.score]++; } } //calc Ly double Ly = 0.0; for (itTable = tableY.begin(); itTable != tableY.end(); itTable++) { double ty = (double) itTable->second; Ly += ((pow(ty, 3.0) - ty) / 12.0); } sort(yscores.begin(), yscores.end(), compareSpearman); //convert to ranks map rank; vector yties; rankTotal = 0; for (int j = 0; j < yscores.size(); j++) { rankTotal += (j+1); yties.push_back(yscores[j]); if (j != yscores.size()-1) { // you are not the last so you can look ahead if (!util.isEqual(yscores[j].score, yscores[j+1].score)) { // you are done with ties, rank them and continue for (int k = 0; k < yties.size(); k++) { float thisrank = rankTotal / (float) yties.size(); rank[yties[k].name] = thisrank; } int t = yties.size(); sg += (t*t*t-t); yties.clear(); rankTotal = 0; } }else { // you are the last one for (int k = 0; k < yties.size(); k++) { float thisrank = rankTotal / (float) yties.size(); rank[yties[k].name] = thisrank; } } } double di = 0.0; for (int k = 0; k < x.size(); k++) { float xi = rankx[toString(k)]; float yi = rank[toString(k)]; di += ((xi - yi) * (xi - yi)); } double p = 0.0; double n = (double) x.size(); double SX2 = ((pow(n, 3.0) - n) / 12.0) - Lx; double SY2 = ((pow(n, 3.0) - n) / 12.0) - Ly; p = (SX2 + SY2 - di) / (2.0 * sqrt((SX2*SY2))); //Numerical Recipes 646 sig = calcSpearmanSig(n, sf, sg, di); return p; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcSpearman"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearmanSig(double n, double sf, double sg, double d){ try { double sig = 0.0; double probrs = 0.0; double en=n; double en3n=en*en*en-en; double aved=en3n/6.0-(sf+sg)/12.0; double fac=(1.0-sf/en3n)*(1.0-sg/en3n); double vard=((en-1.0)*en*en*SQR(en+1.0)/36.0)*fac; double zd=(d-aved)/sqrt(vard); double probd=erfcc(fabs(zd)/1.4142136); double rs=(1.0-(6.0/en3n)*(d+(sf+sg)/12.0))/sqrt(fac); fac=(rs+1.0)*(1.0-rs); if (fac > 0.0) { double t=rs*sqrt((en-2.0)/fac); double df=en-2.0; probrs=betai(0.5*df,0.5,df/(df+t*t)); }else { probrs = 0.0; } //smaller of probd and probrs is sig sig = probrs; if (probd < probrs) { sig = probd; } if (isnan(sig) || isinf(sig)) { sig = 0.0; } return sig; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcSpearmanSig"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcPearson(vector& x, vector& y, double& sig){ try { if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch.\n"); return 0.0; } //find average X float averageX = 0.0; for (int i = 0; i < x.size(); i++) { averageX += x[i]; } averageX = averageX / (float) x.size(); //find average Y float sumY = 0.0; for (int j = 0; j < y.size(); j++) { sumY += y[j]; } float Ybar = sumY / (float) y.size(); double r = 0.0; double numerator = 0.0; double denomTerm1 = 0.0; double denomTerm2 = 0.0; for (int j = 0; j < x.size(); j++) { float Yi = y[j]; float Xi = x[j]; numerator += ((Xi - averageX) * (Yi - Ybar)); denomTerm1 += ((Xi - averageX) * (Xi - averageX)); denomTerm2 += ((Yi - Ybar) * (Yi - Ybar)); } double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); r = numerator / denom; //Numerical Recipes pg.644 sig = calcPearsonSig(x.size(), r); return r; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcPearson"); exit(1); } } /*********************************************************************************************************************************/ double LinearAlgebra::calcPearsonSig(double n, double r){ try { double sig = 0.0; const double TINY = 1.0e-20; double z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY)); //Fisher's z transformation //code below was giving an error in betacf with sop files //int df = n-2; //double t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))); //sig = betai(0.5+df, 0.5, df/(df+t*t)); //Numerical Recipes says code below gives approximately the same result sig = erfcc(fabs(z*sqrt(n-1.0))/1.4142136); if (isnan(sig) || isinf(sig)) { sig = 0.0; } return sig; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "calcPearsonSig"); exit(1); } } /*********************************************************************************************************************************/ vector > LinearAlgebra::getObservedEuclideanDistance(vector >& relAbundData){ try { int numSamples = relAbundData.size(); int numOTUs = relAbundData[0].size(); vector > dMatrix(numSamples); for(int i=0;igetControl_pressed()) { return dMatrix; } double d = 0; for(int k=0;kerrorOut(e, "LinearAlgebra", "getObservedEuclideanDistance"); exit(1); } } /*********************************************************************************************************************************/ vector LinearAlgebra::solveEquations(vector > A, vector b){ try { int length = (int)b.size(); vector x(length, 0); vector index(length); for(int i=0;igetControl_pressed()) { return b; } lubksb(A, index, b); return b; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "solveEquations"); exit(1); } } /*********************************************************************************************************************************/ vector LinearAlgebra::solveEquations(vector > A, vector b){ try { int length = (int)b.size(); vector x(length, 0); vector index(length); for(int i=0;igetControl_pressed()) { return b; } lubksb(A, index, b); return b; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "solveEquations"); exit(1); } } /*********************************************************************************************************************************/ void LinearAlgebra::ludcmp(vector >& A, vector& index, double& d){ try { double tiny = 1e-20; int n = (int)A.size(); vector vv(n, 0.0); int imax = 0; d = 1.0; for(int i=0;i big) { big = thisValue; } } if(util.isEqual(big,(float) 0.0)){ m->mothurOut("[WARNING]: " + toString(i) + " produces singular matrix in routine ludcmp\n"); vv[i] = 0.0; } else { vv[i] = 1.0/big; } } for(int j=0;jgetControl_pressed()) { break; } for(int i=0;i= big){ big = dum; imax = i; } } if(j != imax){ for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); exit(1); } } /*********************************************************************************************************************************/ void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ try { double total; int n = (int)A.size(); int ii = 0; for(int i=0;igetControl_pressed()) { break; } int ip = index[i]; total = b[ip]; b[ip] = b[i]; if (ii != 0) { for(int j=ii-1;j=0;i--){ total = b[i]; for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); exit(1); } } /*********************************************************************************************************************************/ void LinearAlgebra::ludcmp(vector >& A, vector& index, float& d){ try { double tiny = 1e-20; int n = (int)A.size(); vector vv(n, 0.0); int imax = 0; d = 1.0; for(int i=0;i big) { big = thisValue; } } if(util.isEqual(big,(float) 0.0)){ m->mothurOut("[WARNING]: " + toString(i) + " produces singular matrix in routine ludcmp\n"); vv[i] = 0.0; } else { vv[i] = 1.0/big; } } for(int j=0;jgetControl_pressed()) { break; } for(int i=0;i= big){ big = dum; imax = i; } } if(j != imax){ for(int k=0;kerrorOut(e, "LinearAlgebra", "ludcmp"); exit(1); } } /*********************************************************************************************************************************/ void LinearAlgebra::lubksb(vector >& A, vector& index, vector& b){ try { float total; int n = (int)A.size(); int ii = 0; for(int i=0;igetControl_pressed()) { break; } int ip = index[i]; total = b[ip]; b[ip] = b[i]; if (ii != 0) { for(int j=ii-1;j=0;i--){ total = b[i]; for(int j=i+1;jerrorOut(e, "LinearAlgebra", "lubksb"); exit(1); } } /*********************************************************************************************************************************/ vector > LinearAlgebra::getInverse(vector > matrix){ try { int n = (int)matrix.size(); vector > inverse(n); for(int i=0;i column(n, 0.0000); vector index(n, 0); double dummy; ludcmp(matrix, index, dummy); for(int j=0;jgetControl_pressed()) { break; } column.assign(n, 0); column[j] = 1.0000; lubksb(matrix, index, column); for(int i=0;ierrorOut(e, "LinearAlgebra", "getInverse"); exit(1); } } /*********************************************************************************************************************************/ //modelled R lda function - MASS:::lda.default vector< vector > LinearAlgebra::lda(vector< vector >& a, vector groups, vector< vector >& means, bool& ignore) { try { set uniqueGroups; for (int i = 0; i < groups.size(); i++) { uniqueGroups.insert(groups[i]); } int numGroups = uniqueGroups.size(); map quickIndex; //className to index. hoping to save counts, proportions and means in vectors to save time. This map will allow us to know index 0 in counts refers to group1. int count = 0; for (set::iterator it = uniqueGroups.begin(); it != uniqueGroups.end(); it++) { quickIndex[*it] = count; count++; } int numSampled = groups.size(); //number of sampled groups int numOtus = a.size(); //number of flagged bins //counts <- as.vector(table(g)) //number of samples from each class in random sampling vector counts; counts.resize(numGroups, 0); for (int i = 0; i < groups.size(); i++) { counts[quickIndex[groups[i]]]++; } vector proportions; proportions.resize(numGroups, 0.0); for (int i = 0; i < numGroups; i++) { proportions[i] = counts[i] / (double) numSampled; } means.clear(); //means[0] -> means[0][0] average for [group0][OTU0]. means.resize(numGroups); for (int i = 0; i < means.size(); i++) { means[i].resize(numOtus, 0.0); } for (int j = 0; j < numSampled; j++) { //total for each class for each OTU for (int i = 0; i < numOtus; i++) { means[quickIndex[groups[j]]][i] += a[i][j]; } } //average for each class for each OTU for (int j = 0; j < numGroups; j++) { for (int i = 0; i < numOtus; i++) { means[j][i] /= counts[j]; } } //randCov <- x - group.means[g, ] vector< vector > randCov; //randCov[0][0] -> (random sample value0 for OTU0 - average for samples group in OTU0). example OTU0, random sample 0.01 from class early. average of class early for OTU0 is 0.005. randCov[0][0] = (0.01-0.005) for (int i = 0; i < numOtus; i++) { //for each flagged OTU vector tempRand; for (int j = 0; j < numSampled; j++) { tempRand.push_back(a[i][j] - means[quickIndex[groups[j]]][i]); } randCov.push_back(tempRand); } //find variance and std for each OTU //f1 <- sqrt(diag(var(x - group.means[g, ]))) vector stdF1; vector ave; for (int i = 0; i < numOtus; i++) { stdF1.push_back(0.0); ave.push_back(util.getAverage(randCov[i])); } for (int i = 0; i < numOtus; i++) { for (int j = 0; j < numSampled; j++) { stdF1[i] += ((randCov[i][j] - ave[i]) * (randCov[i][j] - ave[i])); } } //fac <- 1/(n - ng) double fac = 1 / (double) (numSampled-numGroups); for (int i = 0; i < stdF1.size(); i++) { stdF1[i] /= (double) (numSampled-1); stdF1[i] = sqrt(stdF1[i]); } vector< vector > scaling; //[numOTUS][numOTUS] for (int i = 0; i < numOtus; i++) { vector temp; for (int j = 0; j < numOtus; j++) { if (i == j) { temp.push_back(1.0/stdF1[i]); } else { temp.push_back(0.0); } } scaling.push_back(temp); } //X <- sqrt(fac) * ((x - group.means[g, ]) %*% scaling) vector< vector > X = randCov; //[numOTUS][numSampled] //((x - group.means[g, ]) %*% scaling) //matrix multiplication of randCov and scaling LinearAlgebra linear; X = linear.matrix_mult(scaling, randCov); //[numOTUS][numOTUS] * [numOTUS][numSampled] = [numOTUS][numSampled] fac = sqrt(fac); for (int i = 0; i < X.size(); i++) { for (int j = 0; j < X[i].size(); j++) { X[i][j] *= fac; } } vector d; vector< vector > v; vector< vector > Xcopy; //X = [numOTUS][numSampled] bool transpose = false; //svd requires rows < columns, so if they are not then I need to transpose and look for the results in v. if (X.size() < X[0].size()) { Xcopy = linear.transpose(X); transpose=true; } else { Xcopy = X; } linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below, because R's version is [numSampled][numOTUS] int rank = 0; set goodColumns; for (int i = 0; i < d.size(); i++) { if (d[i] > 0.0000000001) { rank++; goodColumns.insert(i); } } if (rank == 0) { ignore=true; //m->mothurOut("[ERROR]: rank = 0: variables are numerically const\n"); m->setControl_pressed(true); return scaling; } //scaling <- scaling %*% X.s$v[, 1L:rank] %*% diag(1/X.s$d[1L:rank], , rank) //X.s$v[, 1L:rank] = columns in Xcopy that correspond to "good" d values //diag(1/X.s$d[1L:rank], , rank) = matrix size rank * rank where the diagonal is 1/"good" dvalues /*example: d [1] 3.721545e+00 3.034607e+00 2.296649e+00 7.986927e-16 6.922408e-16 [6] 5.471102e-16 $v [,1] [,2] [,3] [,4] [,5] [,6] [1,] 0.31122175 0.10944725 0.20183340 -0.30136820 0.60786235 -0.13537095 [2,] -0.29563726 -0.20568893 0.11233366 -0.05073289 0.48234270 0.21965978 ... [1] "X.s$v[, 1L:rank]" [,1] [,2] [,3] [1,] 0.31122175 0.10944725 0.20183340 [2,] -0.29563726 -0.20568893 0.11233366 ... [1] "1/X.s$d[1L:rank]" [1] 0.2687056 0.3295320 0.4354170 [1] "diag(1/X.s$d[1L:rank], , rank)" [,1] [,2] [,3] [1,] 0.2687056 0.000000 0.000000 [2,] 0.0000000 0.329532 0.000000 [3,] 0.0000000 0.000000 0.435417 */ if (transpose) { Xcopy = linear.transpose(v); } v.clear(); //store "good" columns - X.s$v[, 1L:rank] v.resize(Xcopy.size()); //[numOTUS]["good" columns] for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { for (int i = 0; i < Xcopy.size(); i++) { v[i].push_back(Xcopy[i][*it]); } } vector< vector > diagRanks; diagRanks.resize(rank); for (int i = 0; i < rank; i++) { diagRanks[i].resize(rank, 0.0); } count = 0; for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { diagRanks[count][count] = 1.0 / d[*it]; count++; } scaling = linear.matrix_mult(linear.matrix_mult(scaling, v), diagRanks); //([numOTUS][numOTUS]*[numOTUS]["good" columns]) = [numOTUS]["good" columns] then ([numOTUS]["good" columns] * ["good" columns]["good" columns] = scaling = [numOTUS]["good" columns] //Note: linear.matrix_mult [1][numGroups] * [numGroups][numOTUs] - columns in first must match rows in second, returns matrix[1][numOTUs] vector< vector > prior; prior.push_back(proportions); vector< vector > xbar = linear.matrix_mult(prior, means); vector xBar = xbar[0]; //length numOTUs //fac <- 1/(ng - 1) fac = 1 / (double) (numGroups-1); //scale(group.means, center = xbar, scale = FALSE) %*% scaling vector< vector > scaledMeans = means; //[numGroups][numOTUs] for (int i = 0; i < numGroups; i++) { for (int j = 0; j < numOtus; j++) { scaledMeans[i][j] -= xBar[j]; } } scaledMeans = linear.matrix_mult(scaledMeans, scaling); //[numGroups][numOTUS]*[numOTUS]["good"columns] = [numGroups]["good"columns] //sqrt((n * prior) * fac) vector temp = proportions; //[numGroups] for (int i = 0; i < temp.size(); i++) { temp[i] *= numSampled * fac; temp[i] = sqrt(temp[i]); } //X <- sqrt((n * prior) * fac) * (scale(group.means, center = xbar, scale = FALSE) %*% scaling) //X <- temp * scaledMeans X.clear(); X = scaledMeans; //[numGroups]["good"columns] for (int i = 0; i < X.size(); i++) { for (int j = 0; j < X[i].size(); j++) { X[i][j] *= temp[j]; } } d.clear(); v.clear(); //we want to transpose so results are in Xcopy, but if that makes rows > columns then we don't since svd requires rows < cols. transpose=false; if (X.size() > X[0].size()) { Xcopy = X; transpose=true; } else { Xcopy = linear.transpose(X); } linear.svd(Xcopy, d, v); //Xcopy gets the results we want for v below //rank <- sum(X.s$d > tol * X.s$d[1L]) //X.s$d[1L] = larger value in d vector double largeD = util.max(d); rank = 0; goodColumns.clear(); for (int i = 0; i < d.size(); i++) { if (d[i] > (0.0000000001*largeD)) { rank++; goodColumns.insert(i); } } if (rank == 0) { ignore=true;//m->mothurOut("[ERROR]: rank = 0: class means are numerically identical.\n"); m->setControl_pressed(true); return scaling; } if (transpose) { Xcopy = linear.transpose(v); } //scaling <- scaling %*% X.s$v[, 1L:rank] - scaling * "good" columns v.clear(); //store "good" columns - X.s$v[, 1L:rank] v.resize(Xcopy.size()); //Xcopy = ["good"columns][numGroups] for (set::iterator it = goodColumns.begin(); it != goodColumns.end(); it++) { for (int i = 0; i < Xcopy.size(); i++) { v[i].push_back(Xcopy[i][*it]); } } scaling = linear.matrix_mult(scaling, v); //[numOTUS]["good" columns] * ["good"columns][new "good" columns] ignore=false; return scaling; } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "lda"); exit(1); } } /*********************************************************************************************************************************/ //Singular value decomposition (SVD) - adapted from http://svn.lirec.eu/libs/magicsquares/src/SVD.cpp /* * svdcomp - SVD decomposition routine. * Takes an mxn matrix a and decomposes it into udv, where u,v are * left and right orthogonal transformation matrices, and d is a * diagonal matrix of singular values. * * This routine is adapted from svdecomp.c in XLISP-STAT 2.1 which is * code from Numerical Recipes adapted by Luke Tierney and David Betz. * * Input to dsvd is as follows: * a = mxn matrix to be decomposed, gets overwritten with u * m = row dimension of a * n = column dimension of a * w = returns the vector of singular values of a * v = returns the right orthogonal transformation matrix */ int LinearAlgebra::svd(vector< vector >& a, vector& w, vector< vector >& v) { try { int flag, i, its, j, jj, k, l, nm; double c, f, h, s, x, y, z; double anorm = 0.0, g = 0.0, scale = 0.0; int numRows = a.size(); if (numRows == 0) { return 0; } int numCols = a[0].size(); if (numCols == 0) { return 0; } w.resize(numCols, 0.0); v.resize(numCols); for (int i = 0; i < numCols; i++) { v[i].resize(numRows, 0.0); } vector rv1; rv1.resize(numCols, 0.0); if (numRows < numCols){ m->mothurOut("[ERROR]: numRows < numCols\n"); m->setControl_pressed(true); return 0; } /* Householder reduction to bidiagonal form */ for (i = 0; i < numCols; i++) { /* left-hand reduction */ l = i + 1; rv1[i] = scale * g; g = s = scale = 0.0; if (i < numRows) { for (k = i; k < numRows; k++) scale += fabs((double)a[k][i]); if (scale) { for (k = i; k < numRows; k++) { a[k][i] = (double)((double)a[k][i]/scale); s += ((double)a[k][i] * (double)a[k][i]); } f = (double)a[i][i]; g = -SIGN(sqrt(s), f); h = f * g - s; a[i][i] = (double)(f - g); if (i != numCols - 1) { for (j = l; j < numCols; j++) { for (s = 0.0, k = i; k < numRows; k++) s += ((double)a[k][i] * (double)a[k][j]); f = s / h; for (k = i; k < numRows; k++) a[k][j] += (double)(f * (double)a[k][i]); } } for (k = i; k < numRows; k++) a[k][i] = (double)((double)a[k][i]*scale); } } w[i] = (double)(scale * g); /* right-hand reduction */ g = s = scale = 0.0; if (i < numRows && i != numCols - 1) { for (k = l; k < numCols; k++) scale += fabs((double)a[i][k]); if (scale) { for (k = l; k < numCols; k++) { a[i][k] = (double)((double)a[i][k]/scale); s += ((double)a[i][k] * (double)a[i][k]); } f = (double)a[i][l]; g = -SIGN(sqrt(s), f); h = f * g - s; a[i][l] = (double)(f - g); for (k = l; k < numCols; k++) rv1[k] = (double)a[i][k] / h; if (i != numRows - 1) { for (j = l; j < numRows; j++) { for (s = 0.0, k = l; k < numCols; k++) s += ((double)a[j][k] * (double)a[i][k]); for (k = l; k < numCols; k++) a[j][k] += (double)(s * rv1[k]); } } for (k = l; k < numCols; k++) a[i][k] = (double)((double)a[i][k]*scale); } } anorm = max(anorm, (fabs((double)w[i]) + fabs(rv1[i]))); } /* accumulate the right-hand transformation */ for (i = numCols - 1; i >= 0; i--) { if (i < numCols - 1) { if (g) { for (j = l; j < numCols; j++) v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g); /* double division to avoid underflow */ for (j = l; j < numCols; j++) { for (s = 0.0, k = l; k < numCols; k++) s += ((double)a[i][k] * (double)v[k][j]); for (k = l; k < numCols; k++) v[k][j] += (double)(s * (double)v[k][i]); } } for (j = l; j < numCols; j++) v[i][j] = v[j][i] = 0.0; } v[i][i] = 1.0; g = rv1[i]; l = i; } /* accumulate the left-hand transformation */ for (i = numCols - 1; i >= 0; i--) { l = i + 1; g = (double)w[i]; if (i < numCols - 1) for (j = l; j < numCols; j++) a[i][j] = 0.0; if (g) { g = 1.0 / g; if (i != numCols - 1) { for (j = l; j < numCols; j++) { for (s = 0.0, k = l; k < numRows; k++) s += ((double)a[k][i] * (double)a[k][j]); f = (s / (double)a[i][i]) * g; for (k = i; k < numRows; k++) a[k][j] += (double)(f * (double)a[k][i]); } } for (j = i; j < numRows; j++) a[j][i] = (double)((double)a[j][i]*g); } else { for (j = i; j < numRows; j++) a[j][i] = 0.0; } ++a[i][i]; } /* diagonalize the bidiagonal form */ for (k = numCols - 1; k >= 0; k--) { /* loop over singular values */ for (its = 0; its < 30; its++) { /* loop over allowed iterations */ flag = 1; for (l = k; l >= 0; l--) { /* test for splitting */ nm = l - 1; if (util.isEqual((fabs(rv1[l]) + anorm), anorm)) { flag = 0; break; } if (util.isEqual((fabs((double)w[nm]) + anorm), anorm)) break; } if (flag) { c = 0.0; s = 1.0; for (i = l; i <= k; i++) { f = s * rv1[i]; if (!util.isEqual((fabs(f) + anorm), anorm)) { g = (double)w[i]; h = pythag(f, g); w[i] = (double)h; h = 1.0 / h; c = g * h; s = (- f * h); for (j = 0; j < numRows; j++) { y = (double)a[j][nm]; z = (double)a[j][i]; a[j][nm] = (double)(y * c + z * s); a[j][i] = (double)(z * c - y * s); } } } } z = (double)w[k]; if (l == k) { /* convergence */ if (z < 0.0) { /* make singular value nonnegative */ w[k] = (double)(-z); for (j = 0; j < numCols; j++) v[j][k] = (-v[j][k]); } break; } if (its >= 30) { m->mothurOut("No convergence after 30,000! iterations \n"); m->setControl_pressed(true); return(0); } /* shift from bottom 2 x 2 minor */ x = (double)w[l]; nm = k - 1; y = (double)w[nm]; g = rv1[nm]; h = rv1[k]; f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); g = pythag(f, 1.0); f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x; /* next QR transformation */ c = s = 1.0; for (j = l; j <= nm; j++) { i = j + 1; g = rv1[i]; y = (double)w[i]; h = s * g; g = c * g; z = pythag(f, h); rv1[j] = z; c = f / z; s = h / z; f = x * c + g * s; g = g * c - x * s; h = y * s; y = y * c; for (jj = 0; jj < numCols; jj++) { x = (double)v[jj][j]; z = (double)v[jj][i]; v[jj][j] = (float)(x * c + z * s); v[jj][i] = (float)(z * c - x * s); } z = pythag(f, h); w[j] = (float)z; if (z) { z = 1.0 / z; c = f * z; s = h * z; } f = (c * g) + (s * y); x = (c * y) - (s * g); for (jj = 0; jj < numRows; jj++) { y = (double)a[jj][j]; z = (double)a[jj][i]; a[jj][j] = (double)(y * c + z * s); a[jj][i] = (double)(z * c - y * s); } } rv1[l] = 0.0; rv1[k] = f; w[k] = (double)x; } } return(0); } catch(exception& e) { m->errorOut(e, "LinearAlgebra", "svd"); exit(1); } } mothur-1.48.0/source/linearalgebra.h000077500000000000000000000072641424121717000173760ustar00rootroot00000000000000#ifndef LINEARALGEBRA #define LINEARALGEBRA /* * linearalgebra.h * mothur * * Created by westcott on 1/7/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" class LinearAlgebra { public: LinearAlgebra() { m = MothurOut::getInstance(); } ~LinearAlgebra() = default; vector > matrix_mult(vector >, vector >); vector >transpose(vector >); void recenter(double, vector >, vector >&); //eigenvectors int tred2(vector >&, vector&, vector&); int qtli(vector&, vector&, vector >&); vector< vector > calculateEuclidianDistance(vector >&, int); //pass in axes and number of dimensions vector< vector > calculateEuclidianDistance(vector >&); //pass in axes vector > getObservedEuclideanDistance(vector >&); double calcPearson(vector >&, vector >&); double calcSpearman(vector >&, vector >&); double calcKendall(vector >&, vector >&); double calcKruskalWallis(vector&, double&); double calcWilcoxon(vector&, vector&, double&); double calcPearson(vector&, vector&, double&); double calcSpearman(vector&, vector&, double&); double calcKendall(vector&, vector&, double&); double calcSpearmanSig(double, double, double, double); //length, f^3 - f where f is the number of ties in x, f^3 - f where f is the number of ties in y, sum of squared diffs in ranks. - designed to find the sif of one score. double calcPearsonSig(double, double); //length, coeff. double calcKendallSig(double, double); //length, coeff. vector solveEquations(vector >, vector); vector solveEquations(vector >, vector); vector > getInverse(vector >); double choose(double, double); double normalvariate(double mu, double sigma); vector< vector > lda(vector< vector >& a, vector groups, vector< vector >& means, bool&); //Linear discriminant analysis - a is [features][valuesFromGroups] groups indicates which group each sampling comes from. For example if groups = early, late, mid, early, early. a[0][0] = value for feature0 from groupEarly. int svd(vector< vector >& a, vector& w, vector< vector >& v); //Singular value decomposition private: MothurOut* m; Utils util; double pythag(double, double); double betacf(const double, const double, const double); double betai(const double, const double, const double); double gammln(const double); //double gammq(const double, const double); double gser(double&, const double, const double, double&); double gcf(double&, const double, const double, double&); double erfcc(double); double gammp(const double, const double); double pnorm(double x); double ran0(int&); //for testing double ran1(int&); //for testing double ran2(int&); //for testing double ran3(int&); //for testing double ran4(int&); //for testing void psdes(unsigned long &, unsigned long &); //for testing void ludcmp(vector >&, vector&, double&); void lubksb(vector >&, vector&, vector&); void ludcmp(vector >&, vector&, float&); void lubksb(vector >&, vector&, vector&); }; #endif mothur-1.48.0/source/metastats/000077500000000000000000000000001424121717000164265ustar00rootroot00000000000000mothur-1.48.0/source/metastats/mothurfisher.cpp000077500000000000000000000120731424121717000216570ustar00rootroot00000000000000/* * mothurfisher.cpp * Mothur * * Created by westcott on 7/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ //translated to c++ using source code http://www.langsrud.com/stat/fisher.htm as a reference #include "mothurfisher.h" /***********************************************************/ double MothurFisher::fexact(double n11_, double n12_, double n21_, double n22_, string o) { try { sleft = 0.0; sright = 0.0; sless = 0.0; slarg = 0.0; otuLabel = o; if(n11_<0) n11_ *= -1; if(n12_<0) n12_ *= -1; if(n21_<0) n21_ *= -1; if(n22_<0) n22_ *= -1; double n1_ = n11_+n12_; double n_1 = n11_+n21_; double n = n11_ +n12_ +n21_ +n22_; if (m->getDebug()) { m->mothurOut("[DEBUG]: fisher:fexact n11_, n1_, n_1, n " + toString(n11_) + " " + toString(n1_) + " " + toString(n_1) + " " + toString(n) + " \n"); } exact(n11_,n1_,n_1,n); double twotail = sleft+sright; if(twotail>1) twotail=1; double result = twotail; return result; }catch(exception& e) { m->errorOut(e, "MothurFisher", "fexact"); exit(1); } } /***********************************************************/ double MothurFisher::lngamm(double z) { // Reference: "Lanczos, C. 'A precision approximation // of the gamma function', J. SIAM Numer. Anal., B, 1, 86-96, 1964." // Translation of Alan Miller's FORTRAN-implementation // See http://lib.stat.cmu.edu/apstat/245 try { double x = 0; x += 0.1659470187408462e-06/(z+7); x += 0.9934937113930748e-05/(z+6); x -= 0.1385710331296526 /(z+5); x += 12.50734324009056 /(z+4); x -= 176.6150291498386 /(z+3); x += 771.3234287757674 /(z+2); x -= 1259.139216722289 /(z+1); x += 676.5203681218835 /(z); x += 0.9999999999995183; return(log(x)-5.58106146679532777-z+(z-0.5)*log(z+6.5)); }catch(exception& e) { m->errorOut(e, "MothurFisher", "lngamm"); exit(1); } } /***********************************************************/ double MothurFisher::lnfact(double n){ try { if(n <= 1) return(0); return(lngamm(n+1)); }catch(exception& e) { m->errorOut(e, "MothurFisher", "lnfact"); exit(1); } } /***********************************************************/ double MothurFisher::lnbico(double n, double k){ try { return(lnfact(n)-lnfact(k)-lnfact(n-k)); }catch(exception& e) { m->errorOut(e, "MothurFisher", "lnbico"); exit(1); } } /***********************************************************/ double MothurFisher::hyper_323(double n11, double n1_, double n_1, double n){ try { return(exp(lnbico(n1_,n11)+lnbico(n-n1_,n_1-n11)-lnbico(n,n_1))); }catch(exception& e) { m->errorOut(e, "MothurFisher", "hyper_323"); exit(1); } } /***********************************************************/ double MothurFisher::myhyper(double n11){ try { double hyper0Result = hyper0(n11,0,0,0); return hyper0Result; }catch(exception& e) { m->errorOut(e, "MothurFisher", "myhyper"); exit(1); } } /***********************************************************/ double MothurFisher::hyper0(double n11i, double n1_i, double n_1i, double ni) { try { if (!( !util.isEqual(n1_i, 0) && !util.isEqual(n_1i,0) && !util.isEqual(ni, 0) )) { if(!(((int)n11i % 10) == 0)){ if(util.isEqual(n11i,sn11+1)) { sprob *= ((sn1_-sn11)/(n11i))*((sn_1-sn11)/(n11i+sn-sn1_-sn_1)); sn11 = n11i; return sprob; } if(util.isEqual(n11i,sn11-1)) { sprob *= ((sn11)/(sn1_-n11i))*((sn11+sn-sn1_-sn_1)/(sn_1-n11i)); sn11 = n11i; return sprob; } } sn11 = n11i; }else{ sn11 = n11i; sn1_=n1_i; sn_1=n_1i; sn=ni; } sprob = hyper_323(sn11,sn1_,sn_1,sn); return sprob; }catch(exception& e) { m->errorOut(e, "MothurFisher", "hyper0"); exit(1); } } /***********************************************************/ double MothurFisher::exact(double n11, double n1_, double n_1, double n){ try { double p,i,j,prob; double max=n1_; if(n_1 max) { m->mothurOut("[WARNING]: i value too high. Take a closer look at the pvalue for " + otuLabel + ".\n"); break; } } i--; if(p<1.00000001*prob) sleft += p; else i--; sright=0; p=myhyper(max); for(j=max-1; p<0.99999999*prob; j--) { sright += p; p=myhyper(j); if (j < 0) { m->mothurOut("[WARNING]: j value too low. Take a closer look at the pvalue for " + otuLabel + ".\n"); break; } } j++; if(p<1.00000001*prob) sright += p; else j++; if(abs(i-n11)errorOut(e, "MothurFisher", "exact"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/metastats/mothurfisher.h000077500000000000000000000014271424121717000213250ustar00rootroot00000000000000#ifndef MOTHUR_FISHER #define MOTHUR_FISHER /* * mothurfisher.h * Mothur * * Created by westcott on 7/8/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" class MothurFisher { public: MothurFisher(){otuLabel = ""; m = MothurOut::getInstance(); } ~MothurFisher(){} double fexact(double, double, double, double, string); private: MothurOut* m; Utils util; double sleft, sright, sless, slarg; double sn11,sn1_,sn_1,sn,sprob; double lngamm(double); double lnfact(double); double lnbico(double, double); double hyper_323(double, double, double, double); double myhyper(double); double hyper0(double, double, double, double); double exact(double, double, double, double); string otuLabel; }; #endif mothur-1.48.0/source/metastats/mothurmetastats.cpp000077500000000000000000000470421424121717000224100ustar00rootroot00000000000000/* * mothurmetastats.cpp * Mothur * * Created by westcott on 7/6/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothurmetastats.h" #include "mothurfisher.h" #include "utils.hpp" /***********************************************************/ MothurMetastats::MothurMetastats(double t, int n) { try { m = MothurOut::getInstance(); threshold = t; numPermutations = n; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "MothurMetastats"); exit(1); } } /***********************************************************/ MothurMetastats::~MothurMetastats() = default; /***********************************************************/ //main metastats function int MothurMetastats::runMetastats(string outputFileName, vector< vector >& data, int secGroupingStart, vector currentLabels, bool fillProps) { try { numOTUs = data.size(); //numBins numSamples = data[0].size(); //numGroups in subset secondGroupingStart = secGroupingStart; //g number of samples in group 1 vector< vector > Pmatrix; Pmatrix.resize(numOTUs); for (int i = 0; i < numOTUs; i++) { Pmatrix[i].resize(numSamples, 0.0); } // the relative proportion matrix vector< vector > C1; C1.resize(numOTUs); for (int i = 0; i < numOTUs; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2 vector< vector > C2; C2.resize(numOTUs); // mean[1], variance[2], standard error[3] for (int i = 0; i < numOTUs; i++) { C2[i].resize(3, 0.0); } vector T_statistics; T_statistics.resize(numOTUs, 1); // a place to store the true t-statistics vector pvalues; pvalues.resize(numOTUs, 1); // place to store pvalues //************************************* // convert to proportions // generate Pmatrix //************************************* vector totals; totals.resize(numSamples, 0); // sum of numSampless / samples -> numSeqs for each sample //total[i] = total abundance for group[i] for (int i = 0; i < numSamples; i++) { //each sample for (int j = 0; j < numOTUs; j++) { //each otu totals[i] += data[j][i]; } } for (int i = 0; i < numSamples; i++) { //sample for (int j = 0; j < numOTUs; j++) { //otu if (fillProps) { Pmatrix[j][i] = data[j][i]/totals[i]; } else { Pmatrix[j][i] = data[j][i]; } } } //#******************************************************************************** //# ************************** STATISTICAL TESTING ******************************** //#******************************************************************************** if (numSamples == 2){ //# then we have a two sample comparison //#************************************************************ //# generate p values fisher's exact test //#************************************************************ double total1, total2; total1 = 0; total2 = 0; //total for first grouping for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; } //total for second grouping for (int i = secondGroupingStart; i < numSamples; i++) { total2 += totals[i]; } vector fish; fish.resize(numOTUs, 0.0); vector fish2; fish2.resize(numOTUs, 0.0); //vector currentLabels = m->getCurrentSharedBinLabels(); for(int i = 0; i < numOTUs; i++){ //numBins for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; } for(int j = secondGroupingStart; j < numSamples; j++) { fish2[i] += data[i][j]; } double f11, f12, f21, f22; f11 = fish[i]; f12 = fish2[i]; f21 = total1 - fish[i]; f22 = total2 - fish2[i]; if (fillProps) { f11 = floor(f11); f12 = floor(f12); f21 = floor(f21); f22 = floor(f22); } MothurFisher fisher; double pre = fisher.fexact(f11, f12, f21, f22, currentLabels[i]); if (pre > 0.999999999) { pre = 1.0; } if (m->getControl_pressed()) { return 1; } pvalues[i] = pre; } }else { //we have multiple subjects per population //#************************************* //# generate statistics mean, var, stderr //#************************************* for(int i = 0; i < numOTUs; i++){ // for each taxa //# find the mean of each group double g1Total = 0.0; double g2Total = 0.0; for (int j = 0; j < secondGroupingStart; j++) { g1Total += Pmatrix[i][j]; } C1[i][0] = g1Total/(double)(secondGroupingStart); for (int j = secondGroupingStart; j < numSamples; j++) { g2Total += Pmatrix[i][j]; } C2[i][0] = g2Total/(double)(numSamples-secondGroupingStart); //# find the variance of each group double g1Var = 0.0; double g2Var = 0.0; for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((Pmatrix[i][j]-C1[i][0]), 2); } C1[i][1] = g1Var/(double)(secondGroupingStart-1); for (int j = secondGroupingStart; j < numSamples; j++) { g2Var += pow((Pmatrix[i][j]-C2[i][0]), 2); } C2[i][1] = g2Var/(double)(numSamples-secondGroupingStart-1); //# find the std error of each group -std err^2 (will change to std err at end) C1[i][2] = C1[i][1]/(double)(secondGroupingStart); C2[i][2] = C2[i][1]/(double)(numSamples-secondGroupingStart); } //#************************************* //# two sample t-statistics //#************************************* for(int i = 0; i < numOTUs; i++){ // # for each taxa double xbar_diff = C1[i][0] - C2[i][0]; double denom = sqrt(C1[i][2] + C2[i][2]); T_statistics[i] = xbar_diff/denom; // calculate two sample t-statistic } if (m->getDebug()) { for (int i = 0; i < numOTUs; i++) { for (int j = 0; j < 3; j++) { cout << "C1[" << i+1 << "," << j+1 << "]=" << C1[i][j] << ";" << endl; cout << "C2[" << i+1 << "," << j+1 << "]=" << C2[i][j] << ";" << endl; } cout << "T_statistics[" << i+1 << "]=" << T_statistics[i] << ";" << endl; } for (int i = 0; i < numOTUs; i++) { for (int j = 0; j < numSamples; j++) { cout << "Fmatrix[" << i+1 << "," << j+1 << "]=" << data[i][j] << ";" << endl; } } } //#************************************* //# generate initial permuted p-values //#************************************* pvalues = permuted_pvalues(Pmatrix, T_statistics, data); if (m->getDebug()) { for (int i = 0; i < numOTUs; i++) { m->mothurOut("[DEBUG]: " + currentLabels[i] + " pvalue = " + toString(pvalues[i]) + "\n"); } } //#************************************* //# generate p values for sparse data //# using fisher's exact test //#************************************* double total1, total2; total1 = 0; total2 = 0; //total for first grouping for (int i = 0; i < secondGroupingStart; i++) { total1 += totals[i]; } //total all seqs in first set //total for second grouping for (int i = secondGroupingStart; i < numSamples; i++) { total2 += totals[i]; } //total all seqs in second set vector fish; fish.resize(numOTUs, 0.0); vector fish2; fish2.resize(numOTUs, 0.0); for(int i = 0; i < numOTUs; i++){ //numBins for(int j = 0; j < secondGroupingStart; j++) { fish[i] += data[i][j]; } for(int j = secondGroupingStart; j < numSamples; j++) { fish2[i] += data[i][j]; } if ((fish[i] < secondGroupingStart) && (fish2[i] < (numSamples-secondGroupingStart))) { double f11, f12, f21, f22; f11 = fish[i]; if (f11 < 0) { f11 *= -1.0; } f11 = floor(f11); f12 = fish2[i]; if (f12 < 0) { f12 *= -1.0; } f12 = floor(f11); f21 = total1 - fish[i]; if (f21 < 0) { f21 *= -1.0; } f21 = floor(f21); f22 = total2 - fish2[i]; if (f22 < 0) { f22 *= -1.0; } f22 = floor(f22); if (fillProps) { f11 = floor(f11); f12 = floor(f12); f21 = floor(f21); f22 = floor(f22); } MothurFisher fisher; if (m->getDebug()) { m->mothurOut("[DEBUG]: about to run fisher for Otu " + currentLabels[i] + " F11, F12, F21, F22 = " + toString(f11) + " " + toString(f12) + " " + toString(f21) + " " + toString(f22) + " " + "\n"); } double pre = fisher.fexact(f11, f12, f21, f22, currentLabels[i]); if (m->getDebug()) { m->mothurOut("[DEBUG]: about to completed fisher for Otu " + currentLabels[i] + " pre = " + toString(pre) + "\n"); } if (pre > 0.999999999) { pre = 1.0; } if (m->getControl_pressed()) { return 1; } pvalues[i] = pre; } } //#************************************* //# convert stderr^2 to std error //#************************************* for(int i = 0; i < numOTUs; i++){ C1[i][2] = sqrt(C1[i][2]); C2[i][2] = sqrt(C2[i][2]); } } // And now we write the files to a text file. struct tm *local; time_t t; t = time(nullptr); local = localtime(&t); ofstream out; Utils util; util.openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out << "Local time and date of test: " << asctime(local) << endl; out << "# numOTUss = " << numOTUs << ", # col = " << numSamples << ", g = " << secondGroupingStart << endl << endl; out << numPermutations << " permutations" << endl << endl; //output numSamples headings - not really sure... documentation labels 9 numSampless, there are 10 in the output file //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293 out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tp-value\n"; for(int i = 0; i < numOTUs; i++){ if (m->getControl_pressed()) { out.close(); return 0; } //if there are binlabels use them otherwise count. if (i < currentLabels.size()) { out << currentLabels[i] << '\t'; } else { out << (i+1) << '\t'; } out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << endl; //if (pvalues[i] < 0.05) { cout << currentLabels[i] << endl; } } out << endl << endl; out.close(); return 0; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "runMetastats"); exit(1); } } /***********************************************************/ vector MothurMetastats::permuted_pvalues(vector< vector >& Imatrix, vector& tstats, vector< vector >& Fmatrix) { try { //# matrix stores tstats for each taxa(numOTUs) for each permuted trial(numSamples) vector ps; ps.resize(numOTUs, 0.0); //# to store the pvalues vector< vector > permuted_ttests; permuted_ttests.resize(numPermutations); for (int i = 0; i < numPermutations; i++) { permuted_ttests[i].resize(numOTUs, 0.0); } //# calculate null version of tstats using B permutations. for (int i = 0; i < numPermutations; i++) { permuted_ttests[i] = permute_and_calc_ts(Imatrix); } //# calculate each pvalue using the null ts if ((secondGroupingStart) < 8 || (numSamples-secondGroupingStart) < 8){ vector< vector > cleanedpermuted_ttests; cleanedpermuted_ttests.resize(numPermutations); //# the array pooling just the frequently observed ts //# then pool the t's together! //# count how many high freq taxa there are int hfc = 1; for (int i = 0; i < numOTUs; i++) { // # for each taxa double group1Total = 0.0; double group2Total = 0.0; for(int j = 0; j < secondGroupingStart; j++) { group1Total += Fmatrix[i][j]; } for(int j = secondGroupingStart; j < numSamples; j++) { group2Total += Fmatrix[i][j]; } if (group1Total >= secondGroupingStart || group2Total >= (numSamples-secondGroupingStart)){ hfc++; for (int j = 0; j < numPermutations; j++) { cleanedpermuted_ttests[j].push_back(permuted_ttests[j][i]); } } } //#now for each taxa for (int i = 0; i < numOTUs; i++) { //number of cleanedpermuted_ttests greater than tstat[i] int numGreater = 0; for (int j = 0; j < numPermutations; j++) { for (int k = 0; k < cleanedpermuted_ttests[j].size(); k++) { if (cleanedpermuted_ttests[j][k] > abs(tstats[i])) { numGreater++; } } } ps[i] = (1/(double)(numPermutations*hfc))*numGreater; } }else{ for (int i = 0; i < numOTUs; i++) { //number of permuted_ttests[i] greater than tstat[i] //(sum(permuted_ttests[i,] > abs(tstats[i]))+1) int numGreater = 1; for (int j = 0; j < numPermutations; j++) { if (permuted_ttests[j][i] > abs(tstats[i])) { numGreater++; } } ps[i] = (1/(double)(numPermutations+1))*numGreater; } } return ps; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "permuted_pvalues"); exit(1); } } /***********************************************************/ vector MothurMetastats::permute_and_calc_ts(vector< vector >& Imatrix) { try { vector< vector > permutedMatrix = Imatrix; //randomize numSampless, ie group abundances. map randomMap; vector randoms; for (int i = 0; i < numSamples; i++) { randoms.push_back(i); } util.mothurRandomShuffle(randoms); for (int i = 0; i < randoms.size(); i++) { randomMap[i] = randoms[i]; } //calc ts vector< vector > C1; C1.resize(numOTUs); for (int i = 0; i < numOTUs; i++) { C1[i].resize(3, 0.0); } // statistic profiles for class1 and class 2 vector< vector > C2; C2.resize(numOTUs); // mean[1], variance[2], standard error[3] for (int i = 0; i < numOTUs; i++) { C2[i].resize(3, 0.0); } vector Ts; Ts.resize(numOTUs, 0.0); // a place to store the true t-statistics //#************************************* //# generate statistics mean, var, stderr //#************************************* for(int i = 0; i < numOTUs; i++){ // for each taxa //# find the mean of each group double g1Total = 0.0; double g2Total = 0.0; for (int j = 0; j < secondGroupingStart; j++) { g1Total += permutedMatrix[i][randomMap[j]]; } C1[i][0] = g1Total/(double)(secondGroupingStart); for (int j = secondGroupingStart; j < numSamples; j++) { g2Total += permutedMatrix[i][randomMap[j]]; } C2[i][0] = g2Total/(double)(numSamples-secondGroupingStart); //# find the variance of each group double g1Var = 0.0; double g2Var = 0.0; for (int j = 0; j < secondGroupingStart; j++) { g1Var += pow((permutedMatrix[i][randomMap[j]]-C1[i][0]), 2); } C1[i][1] = g1Var/(double)(secondGroupingStart-1); for (int j = secondGroupingStart; j < numSamples; j++) { g2Var += pow((permutedMatrix[i][randomMap[j]]-C2[i][0]), 2); } C2[i][1] = g2Var/(double)(numSamples-secondGroupingStart-1); //# find the std error of each group -std err^2 (will change to std err at end) C1[i][2] = C1[i][1]/(double)(secondGroupingStart); C2[i][2] = C2[i][1]/(double)(numSamples-secondGroupingStart); } //#************************************* //# two sample t-statistics //#************************************* for(int i = 0; i < numOTUs; i++){ // # for each taxa double xbar_diff = C1[i][0] - C2[i][0]; double denom = sqrt(C1[i][2] + C2[i][2]); Ts[i] = abs(xbar_diff/denom); // calculate two sample t-statistic } return Ts; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "permuted_ttests"); exit(1); } } /***********************************************************/ int MothurMetastats::OrderPValues(int low, int high, vector& p, vector& order) { try { if (low < high) { int i = low+1; int j = high; int pivot = (low+high) / 2; swapElements(low, pivot, p, order); //puts pivot in final spot /* compare value */ double key = p[low]; /* partition */ while(i <= j) { /* find member above ... */ while((i <= high) && (p[i] <= key)) { i++; } /* find element below ... */ while((j >= low) && (p[j] > key)) { j--; } if(i < j) { swapElements(i, j, p, order); } } swapElements(low, j, p, order); /* recurse */ OrderPValues(low, j-1, p, order); OrderPValues(j+1, high, p, order); } return 0; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "OrderPValues"); exit(1); } } /***********************************************************/ int MothurMetastats::swapElements(int i, int j, vector& p, vector& order) { try { double z = p[i]; p[i] = p[j]; p[j] = z; int temp = order[i]; order[i] = order[j]; order[j] = temp; return 0; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "swapElements"); exit(1); } } /***********************************************************/ vector MothurMetastats::getSequence(int start, int end, int length) { try { vector sequence; double increment = (end-start) / (double) (length-1); sequence.push_back(start); for (int i = 1; i < length-1; i++) { sequence.push_back(int(i*increment)); } sequence.push_back(end); return sequence; }catch(exception& e) { m->errorOut(e, "MothurMetastats", "getSequence"); exit(1); } } /***********************************************************/ mothur-1.48.0/source/metastats/mothurmetastats.h000077500000000000000000000031311424121717000220440ustar00rootroot00000000000000#ifndef MOTHUR_METASTATS #define MOTHUR_METASTATS /* * mothurmetastats.h * Mothur * * Created by westcott on 7/6/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" class MothurMetastats { public: MothurMetastats(double, int); //threshold, numPermutations ~MothurMetastats(); int runMetastats(string, vector< vector >&, int, vector, bool); //outputFileName, data, secondGroupingStart, otuNames, fillPMatrix (if using clr file, abundances are already a proportion so we don't want to redo that step) private: MothurOut* m; int numOTUs, numSamples, numPermutations, secondGroupingStart; double threshold; Utils util; vector permuted_pvalues(vector< vector >&, vector&, vector< vector >&); vector permute_and_calc_ts(vector< vector >&); int start(vector&, int, vector&, vector< vector >&); //Find the initial values for the matrix int meanvar(vector&, int, vector&); int testp(vector&, vector&, vector&, int, vector&, vector&); int permute_matrix(vector&, vector&, int, vector&, vector&, vector&); int permute_array(vector&); int calc_twosample_ts(vector&, int, vector&, vector&, vector&); int OrderPValues(int, int, vector&, vector&); int swapElements(int, int, vector&, vector&); vector getSequence(int, int, int); }; #endif mothur-1.48.0/source/mothur.cpp000077500000000000000000000120211424121717000164420ustar00rootroot00000000000000/* * interface.cpp * * * Created by Pat Schloss on 8/14/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "batchengine.hpp" #include "scriptengine.hpp" #include "interactengine.hpp" #include "mothurout.h" /**************************************************************************************************/ CommandFactory* CommandFactory::_uniqueInstance = 0; MothurOut* MothurOut::_uniqueInstance = 0; CurrentFile* CurrentFile::instance = 0; /***********************************************************************/ volatile int ctrlc_pressed = 0; void ctrlc_handler ( int sig ) { MothurOut* m = MothurOut::getInstance(); ctrlc_pressed = 1; m->setControl_pressed(ctrlc_pressed); if (m->getExecuting()) { //if mid command quit execution, else quit mothur m->mothurOut("\nquitting command...\n"); }else{ m->mothurOut("quitting mothur\n"); exit(1); } } /***********************************************************************/ int main(int argc, char *argv[], char *envp[]){ MothurOut* m = MothurOut::getInstance(); try { CurrentFile* current = CurrentFile::getInstance(); Utils util; bool createLogFile = true; signal(SIGINT, ctrlc_handler ); string mothurVersion, releaseDate, OS; vector defaultPath, toolsPath; util.mothurInitialPrep(defaultPath, toolsPath, mothurVersion, releaseDate, OS); current->setReleaseDate(releaseDate); current->setVersion(mothurVersion); #ifdef MOTHUR_FILES current->setDefaultPath(defaultPath); #endif #ifdef MOTHUR_TOOLS current->setToolsPath(toolsPath); #endif if (argc>1) { if (argc > 2) { //is one of these -q for quiet mode? if (argc > 3) { m->mothurOut("[ERROR]: mothur only allows command inputs and the -q command line options.\n i.e. ./mothur \"#summary.seqs(fasta=final.fasta);\" -q\n or ./mothur -q \"#summary.seqs(fasta=final.fasta);\"\n"); return 0; } else { string argv1 = argv[1]; string argv2 = argv[2]; if ((argv1 == "--quiet") || (argv1 == "-q")) { m->setQuietMode(true); argv[1] = argv[2]; }else if ((argv2 == "--quiet") || (argv2 == "-q")) { m->setQuietMode(true); }else { m->mothurOut("[ERROR]: mothur only allows command inputs and the -q command line options.\n"); m->mothurOut("[ERROR]: Unrecognized options: " + argv1 + " " + argv2 + "\n"); return 0; } } } } map environmentalVariables; for (char **env = envp; *env != 0; env++){ string thisEvn = *env; string key, value; value = thisEvn; util.splitAtEquals(key, value); map::iterator it = environmentalVariables.find(key); if (it == environmentalVariables.end()) { environmentalVariables[key] = value; } else { it->second = value; } //m->mothurOut("[DEBUG]: Setting environment variable " + key + " to " + value + "\n"); if (m->getDebug()) { m->mothurOut("[DEBUG]: Setting environment variable " + key + " to " + value + "\n"); } } Engine* mothur = nullptr; bool bail = false; string input; if(argc>1){ input = argv[1]; if (input[0] == '#') { m->mothurOut("Script Mode\n\n"); mothur = new ScriptEngine(argv[0], argv[1], environmentalVariables); }else if ((input == "--version") || (input == "-v")) { cout << (OS + "\nMothur version=" + mothurVersion + "\nRelease Date=" + releaseDate + "\n\n"); return 0; }else if ((input == "--help") || (input == "-h")) { createLogFile = false; m->mothurOut("Script Mode\n\n"); string helpQuit = "#help();quit();"; argv[1] = util.mothurConvert(helpQuit); mothur = new ScriptEngine(argv[0], argv[1], environmentalVariables); }else{ m->mothurOut("Batch Mode\n\n"); mothur = new BatchEngine(argv[0], argv[1], environmentalVariables); } }else{ m->mothurOut("Interactive Mode\n\n"); mothur = new InteractEngine(argv[0], environmentalVariables); } while(!bail) { bail = mothur->getInput(); } string newlogFileName = mothur->getLogFileName(); if (!createLogFile) { util.mothurRemove(newlogFileName); } if (mothur != nullptr) { delete mothur; } int returnCode = 0; if (m->getNumErrors() != 0) { returnCode = 1; } m->closeLog(); return returnCode; } catch(exception& e) { m->errorOut(e, "mothur", "main"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/mothur.h000066400000000000000000000444141424121717000161170ustar00rootroot00000000000000#ifndef MOTHUR_H #define MOTHUR_H /* * mothur.h * Mothur * * Created by Sarah Westcott on 2/19/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ /* This file contains all the standard incudes we use in the project as well as some common utilities. */ //#include //boost libraries #ifdef USE_BOOST #include #include #include #endif #ifdef USE_HDF5 #include "H5Cpp.h" #endif //io libraries #include #include #include #include #include //exception #include #include #include //containers #include #include #include #include #include #include #include #include //math #include #include #include #include #include #include //misc #include #include #include #include #include /*GSL includes*/ #ifdef USE_GSL #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /***********************************************************************/ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #include #include #include #include #include #include #ifdef USE_READLINE #include #include #endif #else #include //allows unbuffered screen capture from stdin #include //get cwd #include #include #include #endif using namespace std; #define exp(x) (exp((double) x)) #define sqrt(x) (sqrt((double) x)) #define log10(x) (log10((double) x)) #define log2(x) (log10(x)/log10(2)) #define isnan(x) ((x) != (x)) #define isinf(x) (fabs(x) == std::numeric_limits::infinity()) #define GIG 1073741824 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #define PATH_SEPARATOR "/" #define EXECUTABLE_EXT "" #define NON_WINDOWS #define RESET "\033[0m" #define BLACK "\033[30m" /* Black */ #define RED "\033[31m" /* Red */ #define GREEN "\033[32m" /* Green */ #define YELLOW "\033[33m" /* Yellow */ #define BLUE "\033[34m" /* Blue */ #define MAGENTA "\033[35m" /* Magenta */ #define CYAN "\033[36m" /* Cyan */ #define WHITE "\033[37m" /* White */ #define BOLDBLACK "\033[1m\033[30m" /* Bold Black */ #define BOLDRED "\033[1m\033[31m" /* Bold Red */ #define BOLDGREEN "\033[1m\033[32m" /* Bold Green */ #define BOLDYELLOW "\033[1m\033[33m" /* Bold Yellow */ #define BOLDBLUE "\033[1m\033[34m" /* Bold Blue */ #define BOLDMAGENTA "\033[1m\033[35m" /* Bold Magenta */ #define BOLDCYAN "\033[1m\033[36m" /* Bold Cyan */ #define BOLDWHITE "\033[1m\033[37m" /* Bold White */ #undef WINDOWS #if defined (__APPLE__) || (__MACH__) #define OSX #else #undef OSX #endif #else #define PATH_SEPARATOR "\\" #define EXECUTABLE_EXT ".exe" #define WINDOWS #undef NON_WINDOWS #define M_PI 3.14159265358979323846264338327950288 #endif #define MOTHURMAX 1e6 typedef unsigned long ull; typedef unsigned short intDist; const vector nullVector; //used to pass blank vector const vector nullIntVector; //used to pass blank ints const vector nullCharVector; //used to pass blank char const map nullIntMap; const pair nullStringPair("", ""); /**************************************************************************************************/ // trim from start (in place) static inline void ltrim(string &s) { s.erase(s.begin(), find_if(s.begin(), s.end(), [](unsigned char ch) { return !isspace(ch); })); } // trim from end (in place) static inline void rtrim(string &s) { s.erase(find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !isspace(ch); }).base(), s.end()); } // trim from both ends (in place) static inline void trimWhiteSpace(string &s) { ltrim(s); rtrim(s); } //skip over white space template static inline void gobble(In& f) { char d; while(isspace(d=f.get())) { ;} if(!f.eof()) { f.putback(d); } } /**************************************************************************************************/ template void split(const string &s, char delim, Out result) { istringstream iss(s); string item; while (getline(iss, item, delim)) { if (!item.empty()) { //ignore white space *result++ = item; } } } /**************************************************************************************************/ template void split(const string &s, Out result) { istringstream iss(s); string item; char d; while (iss) { iss >> item; while(isspace(d=iss.get())) {;} if(!iss.eof()) { iss.putback(d); } if (!item.empty()) { //ignore white space *result++ = item; } } } /**************************************************************************************************/ static inline void toUpper(string &s) { for_each(s.begin(), s.end(), [](char & c) { c = ::toupper(c); }); } static inline void toLower(string &s) { for_each(s.begin(), s.end(), [](char & c) { c = ::tolower(c); }); } /**************************************************************************************************/ struct classifierOTU { vector > otuData; //otuData[0] -> vector of first characters from each sequence in the OTU, otuData[1] -> vector of second characters from each sequence in the OTU int numSeqs; /* otuData.size = num columns in seq's alignment otuData[i].size() = numSeqs in otu seq1 > atgcaag seq2 > gacctga seq3 > cctgacg otuData[0] > {a,g,c} otuData[1] > {t,a,c} otuData[2] > {g,c,t} otuData[i] > {charInAllCols} if all chars in otuData[i] are identical. ie, ignore column otuData[i] > {a} all seqs contain 'a' in column i of alignment */ classifierOTU(){ numSeqs = 0; } classifierOTU(string aligned) { for (int i = 0; i < aligned.length(); i++) { vector thisSpot; thisSpot.push_back(aligned[i]); otuData.push_back(thisSpot); } numSeqs = 1; } classifierOTU(vector otu) { readSeqs(otu); } classifierOTU(vector > otu, int num) : otuData(otu), numSeqs(num) {} void readSeqs(vector > otu, int num) { otuData = otu; numSeqs = num; } //for shortcut files void readSeqs(vector otu) { auto alignedLength = 0; bool error = false; if (otu.size() != 0) { alignedLength = otu[0].length(); } for (int j = 0; j < otu.size(); j++) { if (otu[j].length() != alignedLength) { error = true;} } if (!error) { for (int i = 0; i < alignedLength; i++) { vector thisSpot; set thisChars; for (int j = 0; j < otu.size(); j++) { thisSpot.push_back(otu[j][i]); thisChars.insert(otu[j][i]); } if (thisChars.size() == 1) { thisSpot.clear(); thisSpot.push_back(*thisChars.begin()); }// all same, reduce to 1. otuData.push_back(thisSpot); } numSeqs = otu.size(); }else { numSeqs = 0; } } }; //****************************************************** struct mcmcSample { double alpha, beta; //dmDash, dV double dNu; int ns; mcmcSample()=default; mcmcSample(double a, double b, double d, int n) : alpha(a), beta(b), dNu(d), ns(n) {} }; typedef struct s_Params { long lSeed; string szOutFileStub; double dSigmaX; //dSigmaM, dSigmaA double dSigmaY; //dSigmaV, dSigmaB double dSigmaN; double dSigmaS; int nIter; } t_Params; typedef struct s_Data { int nNA; int **aanAbund; int nL; int nJ; }t_Data; typedef struct s_LNParams { double dMDash; double dV; int n; } t_LNParams; typedef struct s_LSParams { double dMDash; double dV; double dNu; double dC; int n; } t_LSParams; typedef struct s_IGParams //s_SIParams { int nS; /*number of species in community*/ double dAlpha; double dBeta; double dC; //dGamma int n; } t_IGParams; #ifdef USE_GSL typedef struct s_MetroInit { t_Params *ptParams; t_Data *ptData; gsl_vector* ptX; int nAccepted; long lSeed; int nThread; } t_MetroInit; #endif //*********************************************************************** struct IntNode { int lvalue; int rvalue; int lcoef; int rcoef; IntNode* left; IntNode* right; IntNode(int lv, int rv, IntNode* l, IntNode* r) : lvalue(lv), rvalue(rv), left(l), right(r) {}; IntNode() =default; }; struct ThreadNode { int* pid; IntNode* left; IntNode* right; }; struct diffPair { float prob; float reverseProb; diffPair() { prob = 0; reverseProb = 0; } diffPair(float p, float rp) { prob = p; reverseProb = rp; } }; struct intPair { int abund; int group; intPair() { abund = 0; group = -1; } intPair(int a, int g) : abund(a), group(g) {} ~intPair() = default; }; struct kmerCount { int kmerNumber; int kCount; kmerCount(int kmer, int count) : kmerNumber(kmer), kCount(count) {} }; struct weightedSeq { long long name; long long weight; weightedSeq(long long n, long long w) { name = n; weight = w; } }; struct PCell{ ull row; ull column; float dist; PCell** vectorMap; PCell() : row(0), column(0), dist(0), vectorMap(nullptr) {}; PCell(ull r, ull c, float d) : row(r), column(c), dist(d), vectorMap(nullptr) {}; }; /* For each distance in a sparse matrix we have a row, column and distance. The PDistCell consists of the column and distance. We know the row by the distances row in the seqVec matrix. SeqVec is square and each row is sorted so the column values are ascending to save time in the search for the smallest distance. */ /***********************************************************************/ struct PDistCellMin{ ull row; ull col; PDistCellMin(ull r, ull c) : col(c), row(r) {} }; /***********************************************************************/ struct colDist { int col; int row; float dist; colDist(int r, int c, double d) : row(r), col(c), dist(d) {} }; /************************************************************/ struct seqPNode { int numIdentical; string name; string sequence; vector clusteredIndexes; //indexes of merge nodes. Can use this later to construct names int diffs; seqPNode() { diffs = 0; numIdentical = 0; name = ""; sequence = ""; } seqPNode(string na, string seq, int n, vector nm) : numIdentical(n), name(na), sequence(seq), clusteredIndexes(nm) { diffs = 0; } ~seqPNode() = default; }; /**********************************************************/ struct linePair { double start; double end; linePair(double i, double j) : start(i), end(j) {} linePair(){ start=0; end=0; } ~linePair(){} }; /***********************************************************************/ struct PDistCell{ ull index; float dist; PDistCell() : index(0), dist(0) {}; PDistCell(ull c, float d) : index(c), dist(d) {} }; /***********************************************************************/ struct consTax{ string name; string taxonomy; int abundance; consTax() : name(""), taxonomy("unknown"), abundance(0) {}; consTax(string n, string t, int a) : name(n), taxonomy(t), abundance(a) {} }; /***********************************************************************/ struct listCt{ string bin; int binSize; string label; listCt() : bin(""), binSize(0), label("") {}; listCt(string b, int a, string l) : bin(b), binSize(a), label(l) {} }; /***********************************************************************/ struct consTax2{ string otuName; string taxonomy; int abundance; consTax2() : otuName("OTUxxx"), taxonomy("unknown"), abundance(0) {}; consTax2(string n, string t, int a) : otuName(n), taxonomy(t), abundance(a) {} }; /***********************************************************************/ struct Taxon { string name; float confidence; Taxon(string n, float conf) : name(n), confidence(conf) {} ~Taxon(){} }; /************************************************************/ struct seqDist { int seq1; int seq2; double dist; seqDist() = default; seqDist(int s1, int s2, double d) : seq1(s1), seq2(s2), dist(d) {} ~seqDist() = default; }; /************************************************************/ struct oligosPair { string forward; string reverse; oligosPair() { forward = ""; reverse = ""; } oligosPair(string f, string r) : forward(f), reverse(r) {} ~oligosPair() = default; }; /************************************************************/ struct seqPriorityNode { int numIdentical; string seq; string name; seqPriorityNode() = default; seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {} ~seqPriorityNode() = default; }; /************************************************************/ struct compGroup { string group1; string group2; compGroup() = default; compGroup(string s, string nm) : group1(s), group2(nm) {} string getCombo() { return group1+"-"+group2; } ~compGroup() = default; }; /***************************************************************/ struct spearmanRank { string name; float score; spearmanRank(string n, float s) : name(n), score(s) {} }; //*********************************************************************** inline bool compareGroups(intPair left, intPair right){ return (left.group > right.group); } //*********************************************************************** inline bool compareIndexes(PDistCell left, PDistCell right){ return (left.index > right.index); } //******************************************************************************************************************** inline bool compareSpearman(spearmanRank left, spearmanRank right){ return (left.score < right.score); } //******************************************************************************************************************** inline double max(double left, double right){ if (left > right) { return left; } else { return right; } } //******************************************************************************************************************** inline double max(int left, double right){ double value = left; if (left > right) { return value; } else { return right; } } //******************************************************************************************************************** inline double max(double left, int right){ double value = right; if (left > value) { return left; } else { return value; } } //******************************************************************************************************************** //sorts highest to lowest inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){ if (left.numIdentical > right.numIdentical) { return true; }else if (left.numIdentical == right.numIdentical) { if (left.seq > right.seq) { return true; } else { return false; } } return false; } /********************************************************************************************************************/ //sorts lowest to highest inline bool compareSequenceDistance(seqDist left, seqDist right){ return (left.dist < right.dist); } /***********************************************************************/ // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 // works for now, but there should be a way to do it without killing the whole program class BadConversion : public runtime_error { public: BadConversion(const string& s) : runtime_error(s){ } }; //********************************************************************************************************************** template void convert(const string& s, T& x, bool failIfLeftoverChars = true){ istringstream i(s); char c; if (!(i >> x) || (failIfLeftoverChars && i.get(c))) throw BadConversion(s); } //******************************************************************************* template bool convertTestFloat(const string& s, T& x, bool failIfLeftoverChars = true){ istringstream i(s); char c; if (!(i >> x) || (failIfLeftoverChars && i.get(c))) { return false; } return true; } //*********************************************************************** template bool convertTest(const string& s, T& x, bool failIfLeftoverChars = true){ istringstream i(s); char c; if (!(i >> x) || (failIfLeftoverChars && i.get(c))) { return false; } return true; } //********************************************************************** template string toString(const T&x){ stringstream output; output << x; return output.str(); } //********************************************************************* template string toString(const T&x, int i){ stringstream output; output.precision(i); output << fixed << x; return output.str(); } //************************************************************************* template string toHex(const T&x){ stringstream output; output << hex << x; return output.str(); } //************************************************************************* #endif mothur-1.48.0/source/mothurout.cpp000077500000000000000000000525121424121717000172030ustar00rootroot00000000000000/* * mothurOut.cpp * Mothur * * Created by westcott on 2/25/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothurout.h" #include "ordervector.hpp" #include "sharedordervector.h" #include "counttable.h" /******************************************************/ MothurOut* MothurOut::getInstance() { if( _uniqueInstance == 0) { _uniqueInstance = new MothurOut(); } return _uniqueInstance; } /*********************************************************************************************/ void MothurOut::appendLogBuffer(string partialLog) { try { buffer += partialLog; } catch(exception& e) { errorOut(e, "MothurOut", "appendLogBuffer"); exit(1); } } /*********************************************************************************************/ void MothurOut::setLogFileName(string filename, bool append) { try { silenceWarnings = false; Utils util; if ((filename == "silent")) { silenceLog = true; } else { logFileName = filename; if (outLog != nullptr) { closeLog(); delete outLog; outLog = nullptr; } outLog = new ofstream(); silenceLog = false; if (append) { util.openOutputFileAppend(filename, *outLog); *outLog << "\n\n************************************************************\n\n\n"; }else { bool opendLog = util.openOutputFile(filename, *outLog); if (!opendLog) { control_pressed = true; } } } } catch(exception& e) { errorOut(e, "MothurOut", "setFileName"); exit(1); } } /*********************************************************************************************/ void MothurOut::closeLog() { try { if (buffer != "") { string output = buffer; buffer = ""; mothurOut(output); } string outputLogName = "Logfile : " + logFileName + "\n\n"; if (!silenceLog) { mothurOut(outputLogName); } if (numErrors != 0) { if (!silenceLog) { *outLog << "\n\n************************************************************\n"; *outLog << "************************************************************\n"; *outLog << "************************************************************\n"; *outLog << "Detected " + toString(numErrors) + " [ERROR] messages, please review.\n"; *outLog << "************************************************************\n"; *outLog << "************************************************************\n"; *outLog << "************************************************************\n"; } logger() << "\n\n************************************************************\n"; logger() << "************************************************************\n"; logger() << "************************************************************\n"; logger() << "Detected " + toString(numErrors) + " [ERROR] messages, please review.\n"; logger() << "************************************************************\n"; logger() << "************************************************************\n"; logger() << "************************************************************\n"; } if (numWarnings != 0) { if (!silenceLog) { *outLog << "\n\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; *outLog << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; *outLog << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; *outLog << "Detected " + toString(numWarnings) + " [WARNING] messages, please review.\n"; *outLog << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; *outLog << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; *outLog << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; } logger() << "\n\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; logger() << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; logger() << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; logger() << "Detected " + toString(numWarnings) + " [WARNING] messages, please review.\n"; logger() << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; logger() << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; logger() << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<^>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"; } outLog->close(); } catch(exception& e) { errorOut(e, "MothurOut", "closeLog"); exit(1); } } /*********************************************************************************************/ MothurOut::~MothurOut() { try { _uniqueInstance = 0; } catch(exception& e) { errorOut(e, "MothurOut", "MothurOut"); exit(1); } } /*********************************************************************************************/ void MothurOut::mothurOut(string output) { try { if (outLog == nullptr) { appendLogBuffer(output); return; } if (buffer != "") { output = buffer + output; buffer = ""; } if (output.find("[ERROR]") != string::npos) { numErrors++; numCommandErrors++; if (numCommandErrors > maxCommandErrors) { logger() << "\n**** Exceeded maximum allowed command errors, quitting ****\n"; control_pressed = true; } //abort command } bool savedSilenceLog = silenceLog; bool containsWarning = false; if (output.find("[WARNING]") != string::npos) { numWarnings++; numCommandWarnings++; containsWarning = true; if (numCommandWarnings > maxCommandWarnings) { if (!silenceWarnings) { logger() << "\n**** Exceeded maximum allowed command warnings, silencing warnings ****\n"; } silenceWarnings = true; // write to cout, don't add to logfile } } if (!quietMode) { if (!silenceLog) { if (silenceWarnings && containsWarning) {} //do not print warning to logfile if warnings are silenced else { *outLog << output; } } logger() << output; }else { //check for this being an error if ((output.find("[ERROR]") != string::npos) || (output.find("mothur >") != string::npos)) { if (!silenceLog) { *outLog << output; } logger() << output; } } silenceLog = savedSilenceLog; } catch(exception& e) { errorOut(e, "MothurOut", "MothurOut"); exit(1); } } /*********************************************************************************************/ void MothurOut::mothurOutJustToScreen(string output) { try { if (buffer != "") { output = buffer + output; buffer = ""; } if (output.find("[ERROR]") != string::npos) { numErrors++; numCommandErrors++; if (numCommandErrors > maxCommandErrors) { logger() << "\n**** Exceeded maximum allowed command errors, quitting ****\n"; control_pressed = true; } //abort command } bool containsWarning = false; if (output.find("[WARNING]") != string::npos) { numWarnings++; numCommandWarnings++; containsWarning = true; if (numCommandWarnings > maxCommandWarnings) { if (!silenceWarnings) { logger() << "\n**** Exceeded maximum allowed command warnings, silencing warnings ****\n"; } silenceWarnings = true; // write to cout, don't add to logfile } } if (!quietMode) { logger() << output; }else { //check for this being an error if ((output.find("[ERROR]") != string::npos) || (output.find("mothur >") != string::npos)) { logger() << output; } } } catch(exception& e) { errorOut(e, "MothurOut", "MothurOut"); exit(1); } } /*********************************************************************************************/ void MothurOut::mothurOutEndLine() { try { if (outLog == nullptr) { appendLogBuffer("\n"); return; } if (!quietMode) { if (!silenceLog) { *outLog << buffer << endl; } logger() << buffer << endl; } buffer = ""; } catch(exception& e) { errorOut(e, "MothurOut", "MothurOutEndLine"); exit(1); } } /*********************************************************************************************/ void MothurOut::mothurOutJustToLog(string output) { try { if (outLog == nullptr) { appendLogBuffer(output); return; } if (buffer != "") { output = buffer + output; buffer = ""; } if (output.find("[ERROR]") != string::npos) { numErrors++; numCommandErrors++; if (numCommandErrors > maxCommandErrors) { logger() << "\n**** Exceeded maximum allowed command errors, quitting ****\n"; control_pressed = true; } //abort command } bool savedSilenceLog = silenceLog; bool containsWarning = false; if (output.find("[WARNING]") != string::npos) { numWarnings++; numCommandWarnings++; containsWarning = true; if (numCommandWarnings > maxCommandWarnings) { if (!silenceWarnings) { logger() << "\n**** Exceeded maximum allowed command warnings, silencing warnings ****\n"; } silenceWarnings = true; // write to cout, don't add to logfile } } if (!quietMode) { if (!silenceLog) { if (silenceWarnings && containsWarning) {} //do not print warning to logfile if warnings are silenced else { *outLog << output; } } }else { //check for this being an error if ((output.find("[ERROR]") != string::npos) || (output.find("mothur >") != string::npos)) { if (!silenceLog) { *outLog << output; } } } silenceLog = savedSilenceLog; } catch(exception& e) { errorOut(e, "MothurOut", "MothurOutJustToLog"); exit(1); } } /*********************************************************************************************/ void MothurOut::errorOut(exception& e, string object, string function) { numErrors++; string errorType = toString(e.what()); int pos = errorType.find("bad_alloc"); mothurOut("[ERROR]: " + errorType); double ramUsed, total; Utils util; ramUsed = util.getRAMUsed(); total = util.getTotalRAM(); mothurOut("RAM used: " + toString(ramUsed/(double)GIG) + "Gigabytes . Total Ram: " + toString(total/(double)GIG) + "Gigabytes.\n\n"); if (pos == string::npos) { //not bad_alloc mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry\n"); }else { //bad alloc if (object == "cluster"){ mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are unable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); }else if (object == "shhh.flows"){ mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are unable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. "); }else { mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are unable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry."); } } } /*********************************************************************************************/ void MothurOut::setHomePath(string pathname) { try { if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } homePath = pathname; } catch(exception& e) { errorOut(e, "MothurOut", "setHomePath"); exit(1); } } /*********************************************************************************************/ void MothurOut::setPaths(vector pathVariables) { try { for (int i = 0; i < pathVariables.size(); i++) { string pathname = pathVariables[i]; if (pathname != "") { //add / to name if needed string lastChar = pathname.substr(pathname.length()-1); if (lastChar != PATH_SEPARATOR) { pathname += PATH_SEPARATOR; } } } paths = pathVariables; } catch(exception& e) { errorOut(e, "MothurOut", "setPaths"); exit(1); } } /*********************************************************************************************/ void MothurOut::initialize() { try { validAminoAcids.insert('A'); validAminoAcids.insert('R'); validAminoAcids.insert('N'); validAminoAcids.insert('D'); validAminoAcids.insert('B'); validAminoAcids.insert('C'); validAminoAcids.insert('Q'); validAminoAcids.insert('E'); validAminoAcids.insert('Z'); validAminoAcids.insert('G'); validAminoAcids.insert('H'); validAminoAcids.insert('I'); validAminoAcids.insert('L'); validAminoAcids.insert('K'); validAminoAcids.insert('M'); validAminoAcids.insert('F'); validAminoAcids.insert('P'); validAminoAcids.insert('S'); validAminoAcids.insert('T'); validAminoAcids.insert('W'); validAminoAcids.insert('Y'); validAminoAcids.insert('V'); validAminoAcids.insert('X'); validAminoAcids.insert('-'); validAminoAcids.insert('.'); validAminoAcids.insert('*'); validAminoAcids.insert('?'); codons.clear(); codons.resize(4); for (int i = 0; i < codons.size(); i++) { codons[i].resize(4); for (int j = 0; j < codons[i].size(); j++) { codons[i][j].resize(4); } } //AAX codons[0][0][0] = 'K'; //AAA | Lysine (K) -> 11. where 11 is the index into the aas enum. codons[0][0][1] = 'N'; //AAT | Asparagine (N) -> 2. codons[0][0][2] = 'K'; //AAG | Lysine (K) -> 11. codons[0][0][3] = 'N'; //AAC | Asparagine (N) -> 2. //ATX codons[0][1][0] = 'I'; //ATA | Isoleucine (I) -> 9. codons[0][1][1] = 'I'; //ATT | Isoleucine (I) -> 9. codons[0][1][2] = 'M'; //ATG | Methionine (M) -> 12. codons[0][1][3] = 'I'; //ATC | Isoleucine (I) -> 9. //AGX codons[0][2][0] = 'R'; //AGA | Arginine (R) -> 1. codons[0][2][1] = 'S'; //AGT | Serine (S) -> 15. codons[0][2][2] = 'R'; //AGG | Arginine (R) -> 1. codons[0][2][3] = 'S'; //AGC | Serine (S) -> 15. //ACX codons[0][3][0] = 'T'; //ACA | Threonine (T) -> 17. codons[0][3][1] = 'T'; //ACT | Threonine (T) -> 17. codons[0][3][2] = 'T'; //ACG | Threonine (T) -> 17. codons[0][3][3] = 'T'; //ACC | Threonine (T) -> 17. //TAX codons[1][0][0] = '*'; //TAA | Termination (X) -> 22 codons[1][0][1] = 'Y'; //TAT | Tyrosine (Y) -> 19 codons[1][0][2] = '*'; //TAG | Termination (X) -> 22 codons[1][0][3] = 'Y'; //TAC | Tyrosine (Y) -> 19 //TTX codons[1][1][0] = 'L'; //TTA | Leucine (L) -> 10 codons[1][1][1] = 'F'; //TTT | Phenylalanine (F) -> 13 codons[1][1][2] = 'L'; //TTG | Leucine (L) -> 10 codons[1][1][3] = 'F'; //TTC | Phenylalanine (F) -> 13 //TGX codons[1][2][0] = '*'; //TGA | Termination (X) -> 22 codons[1][2][1] = 'C'; //TGT | Cysteine (C) -> 4 codons[1][2][2] = 'W'; //TGG | Tryptophan (W) -> 18 codons[1][2][3] = 'C'; //TGC | Cysteine (C) -> 4 //TCX codons[1][3][0] = 'S'; //TCA | Serine (S) -> 15 codons[1][3][1] = 'S'; //TCT | Serine (S) -> 15 codons[1][3][2] = 'S'; //TCG | Serine (S) -> 15 codons[1][3][3] = 'S'; //TCC | Serine (S) -> 15 //GAX codons[2][0][0] = 'E'; //GAA | Glutamate (E) -> 6 codons[2][0][1] = 'D'; //GAT | Aspartate (D) -> 3 codons[2][0][2] = 'E'; //GAG | Glutamate (E) -> 6 codons[2][0][3] = 'D'; //GAC | Aspartate (D) -> 3 //GTX codons[2][1][0] = 'V'; //GTA | Valine (V) codons[2][1][1] = 'V'; //GTT | Valine (V) codons[2][1][2] = 'V'; //GTG | Valine (V) codons[2][1][3] = 'V'; //GTC | Valine (V) //GGX codons[2][2][0] = 'G'; //GGA | Glycine (G) codons[2][2][1] = 'G'; //GGT | Glycine (G) codons[2][2][2] = 'G'; //GGG | Glycine (G) codons[2][2][3] = 'G'; //GGC | Glycine (G) //GCX codons[2][3][0] = 'A'; //GCA | Alanine (A) codons[2][3][1] = 'A'; //GCT | Alanine (A) codons[2][3][2] = 'A'; //GCG | Alanine (A) codons[2][3][3] = 'A'; //GCC | Alanine (A) //CAX codons[3][0][0] = 'Q'; //CAA | Glutamine (Q) codons[3][0][1] = 'H'; //CAT | Histidine (H) codons[3][0][2] = 'Q'; //CAG | Glutamine (Q) codons[3][0][3] = 'H'; //CAC | Histidine (H) //CTX codons[3][1][0] = 'L'; //CTA | Leucine (L) codons[3][1][1] = 'L'; //CTT | Leucine (L) codons[3][1][2] = 'L'; //CTG | Leucine (L) codons[3][1][3] = 'L'; //CTC | Leucine (L) //CGX codons[3][2][0] = 'R'; //CGA | Arginine (R) codons[3][2][1] = 'R'; //CGT | Arginine (R) codons[3][2][2] = 'R'; //CGG | Arginine (R) codons[3][2][3] = 'R'; //CGC | Arginine (R) //CCX codons[3][3][0] = 'P'; //CCA | Proline (P) codons[3][3][1] = 'P'; //CCT | Proline (P) codons[3][3][2] = 'P'; //CCG | Proline (P) codons[3][3][3] = 'P'; //CCC | Proline (P) } catch(exception& e) { errorOut(e, "MothurOut", "initialize"); exit(1); } } /********************************************************************/ mothur-1.48.0/source/mothurout.h000077500000000000000000000107371424121717000166530ustar00rootroot00000000000000#ifndef MOTHUROUT_H #define MOTHUROUT_H /* * mothurOut.h * Mothur * * Created by westcott on 2/25/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" /***********************************************/ struct logger { logger() = default; ~logger() = default; template< class T > logger& operator <<( const T& o ) { //lock_guard guard(token); cout << o; return *this; } logger& operator<<(ostream& (*m)(ostream&) ) { //lock_guard guard(token); cout << m; return *this; } private: //std::mutex token; }; /***********************************************/ class MothurOut { public: static MothurOut* getInstance(); //logger bool getDebug() { return debug; } void setDebug(bool t) { debug = t; } void setQuietMode(bool t) { quietMode = t; } int getNumErrors() { return numErrors; } void resetCommandErrors() { control_pressed = false; numCommandErrors = 0; numCommandWarnings = 0; silenceWarnings = false;} string getLogFileName() { return logFileName; } void setLogFileName(string f, bool append); void setHomePath(string); string getHomePath() { return homePath; } void setPaths(vector); //environment variable 'PATH' values vector getPaths() { return paths; } void mothurOut(string); //writes to cout and the logfile void mothurOutEndLine(); //writes to cout and the logfile void mothurOut(string, ofstream&); //writes to the ofstream, cout and the logfile void mothurOutJustToScreen(string); //writes to cout void mothurOutJustToLog(string); void errorOut(exception&, string, string); void closeLog(); //globals void setRandomSeed(unsigned s) { seed = s; } unsigned getRandomSeed() { return seed; } bool getControl_pressed() { return control_pressed; } void setControl_pressed(bool t) { control_pressed = t; } bool getChangedSeqNames() { return changedSeqNames; } void setChangedSeqNames(bool t) { changedSeqNames = t; } bool getChangedGroupNames() { return changedGroupNames; } void setChangedGroupNames(bool t) { changedGroupNames = t; } bool getExecuting() { return executing; } void setExecuting(bool t) { executing = t; } vector>> codons; set validAminoAcids; private: static MothurOut* _uniqueInstance; MothurOut( const MothurOut& ); // Disable copy constructor void operator=( const MothurOut& ); // Disable assignment operator MothurOut() { control_pressed = false; debug = false; quietMode = false; changedSeqNames = true; changedGroupNames = true; silenceLog = false; silenceWarnings = false; //per command numErrors = 0; numWarnings = 0; numCommandErrors = 0; numCommandWarnings = 0; maxCommandErrors = 10; maxCommandWarnings = 10; logFileName = ""; buffer = ""; homePath = ""; outLog = nullptr; seed = std::chrono::system_clock::now().time_since_epoch().count(); initialize(); //fills validAminoAcids and codons } ~MothurOut(); void appendLogBuffer(string); //used to store log before we establish the logfilename void initialize(); ofstream* outLog; unsigned seed; int numErrors, numWarnings, numCommandErrors, numCommandWarnings, maxCommandErrors, maxCommandWarnings; string logFileName, buffer, homePath; vector paths; bool changedSeqNames, changedGroupNames, silenceLog, silenceWarnings, control_pressed, executing, debug, quietMode; }; /***********************************************/ #endif mothur-1.48.0/source/myseqdist.cpp000077500000000000000000000267201424121717000171610ustar00rootroot00000000000000/* * pds.seqdist.cpp * * * Created by Pat Schloss on 8/12/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "myseqdist.h" #include "sequence.hpp" /**************************************************************************************************/ correctDist::correctDist(int p) : processors(p) { try { m = MothurOut::getInstance(); } catch(exception& e) { m->errorOut(e, "correctDist", "correctDist"); exit(1); } } /**************************************************************************************************/ correctDist::correctDist(string sequenceFileName, int p) : processors(p) { try { m = MothurOut::getInstance(); getSequences(sequenceFileName); } catch(exception& e) { m->errorOut(e, "correctDist", "correctDist"); exit(1); } } /**************************************************************************************************/ int correctDist::addSeq(string seqName, string seqSeq){ try { names.push_back(seqName); sequences.push_back(fixSequence(seqSeq)); return 0; } catch(exception& e) { m->errorOut(e, "correctDist", "addSeq"); exit(1); } } /**************************************************************************************************/ void correctDist::execute(string distanceFileName){ try { createProcess(distanceFileName); } catch(exception& e) { m->errorOut(e, "correctDist", "execute"); exit(1); } } /**************************************************************************************************/ int correctDist::getSequences(string sequenceFileName){ try { ifstream sequenceFile; Utils util; util.openInputFile(sequenceFileName, sequenceFile); string seqName, seqSeq; while(!sequenceFile.eof()){ if (m->getControl_pressed()) { break; } Sequence temp(sequenceFile); gobble(sequenceFile); if (temp.getName() != "") { names.push_back(temp.getName()); sequences.push_back(fixSequence(temp.getAligned())); } } sequenceFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "correctDist", "getSequences"); exit(1); } } /**************************************************************************************************/ vector correctDist::fixSequence(string sequence){ try { int alignLength = sequence.length(); vector seqVector; for(int i=0;ierrorOut(e, "correctDist", "fixSequence"); exit(1); } } /**************************************************************************************************/ struct correctData { string outputFileName; long long startLine, endLine; vector > correctMatrix; vector > sequences; MothurOut* m; Utils util; correctData(string ofn, vector > seqs, long long sLine, long long eLine) { outputFileName = ofn; startLine = sLine; endLine = eLine; sequences = seqs; m = MothurOut::getInstance(); correctMatrix.resize(4); for(int i=0;i<4;i++){ correctMatrix[i].resize(4); } correctMatrix[0][0] = 0.000000; //AA correctMatrix[1][0] = 11.619259; //CA correctMatrix[2][0] = 11.694004; //TA correctMatrix[3][0] = 7.748623; //GA correctMatrix[1][1] = 0.000000; //CC correctMatrix[2][1] = 7.619657; //TC correctMatrix[3][1] = 12.852562; //GC correctMatrix[2][2] = 0.000000; //TT correctMatrix[3][2] = 10.964048; //TG correctMatrix[3][3] = 0.000000; //GG for(int i=0;i<4;i++){ for(int j=0;j >& alignMoves, int i, int j, vector& seqA, vector& seqB, MothurOut* m){ try { char nullReturn = -1; while(i>=1 && j>=1){ if (m->getControl_pressed()) { return nullReturn; } if(direction == 'd'){ if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; } else { return nullReturn; } } else if(direction == 'l') { j--; } else { i--; } direction = alignMoves[i][j]; } return nullReturn; } catch(exception& e) { m->errorOut(e, "correctDist", "getLastMatch"); exit(1); } } /**************************************************************************************************/ double getDist(vector& seqA, vector& seqB, vector >& correctMatrix, MothurOut* m){ try { int lengthA = seqA.size(); int lengthB = seqB.size(); vector > alignMatrix(lengthA+1); vector > alignMoves(lengthA+1); for(int i=0;i<=lengthA;i++){ alignMatrix[i].resize(lengthB+1, 0); alignMoves[i].resize(lengthB+1, 'x'); } for(int i=0;i<=lengthA;i++){ alignMatrix[i][0] = 15.0 * i; alignMoves[i][0] = 'u'; } for(int i=0;i<=lengthB;i++){ alignMatrix[0][i] = 15.0 * i; alignMoves[0][i] = 'l'; } for(int i=1;i<=lengthA;i++){ for(int j=1;j<=lengthB;j++){ if (m->getControl_pressed()) { return 0; } double nogap; nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]]; double gap; double left; if(i == lengthA){ left = alignMatrix[i][j-1]; } //terminal gap else{ if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB, m)) { gap = 4.0; } else { gap = 15.0; } left = alignMatrix[i][j-1] + gap; } double up; if(j == lengthB){ up = alignMatrix[i-1][j]; } //terminal gap else{ if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB, m)) { gap = 4.0; } else { gap = 15.0; } up = alignMatrix[i-1][j] + gap; } if(nogap < left){ if(nogap < up){ alignMoves[i][j] = 'd'; alignMatrix[i][j] = nogap; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } else{ if(left < up){ alignMoves[i][j] = 'l'; alignMatrix[i][j] = left; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } } } int i = lengthA; int j = lengthB; int count = 0; while(i > 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(alignMoves[i][j] == 'd') { count++; i--; j--; } else if(alignMoves[i][j] == 'u') { if(j != lengthB){ count++; } i--; } else if(alignMoves[i][j] == 'l') { if(i != lengthA){ count++; } j--; } } return alignMatrix[lengthA][lengthB] / (double)count; } catch(exception& e) { m->errorOut(e, "correctDist", "getDist"); exit(1); } } /**************************************************************************************************/ int driverCorrect(correctData* params){ try { ofstream distFile; params->util.openOutputFile(params->outputFileName, distFile); distFile << setprecision(9); if(params->startLine == 0){ distFile << params->sequences.size() << endl; } int startTime = time(nullptr); params->m->mothurOut("\nCalculating distances for (" + toString(params->startLine+1) + " to " + toString(params->endLine+1) + ")... \n"); for(int i = params->startLine;i < params->endLine; i++){ if (params->m->getControl_pressed()) { distFile.close(); return 0; } distFile << i; for(int j=0;jsequences[i], params->sequences[j], params->correctMatrix, params->m); } distFile << endl; if(i % 100 == 0){ params->m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(nullptr) - startTime)+"\n"); } } distFile.close(); if((params->endLine-1) % 100 != 0){ params->m->mothurOutJustToScreen(toString(params->endLine-1) + "\t" + toString(time(nullptr) - startTime)+"\n"); } params->m->mothurOut("Done.\n"); return 0; } catch(exception& e) { params->m->errorOut(e, "correctDist", "driverCorrect"); exit(1); } } /**************************************************************************************************/ int correctDist::createProcess(string distanceFileName){ try { vector lines; for(int i=0;i workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = toString(i+1) + ".temp"; correctData* dataBundle = new correctData(distanceFileName+extension, sequences, lines[i+1].start, lines[i+1].end); data.push_back(dataBundle); std::thread* thisThread = new std::thread(driverCorrect, dataBundle); workerThreads.push_back(thisThread); } correctData* dataBundle = new correctData(distanceFileName, sequences, lines[0].start, lines[0].end); driverCorrect(dataBundle); delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); string extension = toString(i+1) + ".temp"; util.appendFiles((distanceFileName+extension), distanceFileName); util.mothurRemove(distanceFileName+extension); delete data[i]; delete workerThreads[i]; } return 0; } catch(exception& e) { m->errorOut(e, "correctDist", "createProcess"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/myseqdist.h000077500000000000000000000014501424121717000166170ustar00rootroot00000000000000#ifndef CORRECTDIST_H #define CORRECTDIST_H /* * pds.seqdist.h * * * Created by Pat Schloss on 8/12/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "mothurout.h" #include "utils.hpp" /**************************************************************************************************/ class correctDist { public: correctDist(string, int); correctDist(int); ~correctDist(){} int addSeq(string, string); void execute(string); private: MothurOut* m; Utils util; vector > sequences; vector names; int processors; int getSequences(string); vector fixSequence(string); int createProcess(string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/nast.cpp000077500000000000000000000362571424121717000161120ustar00rootroot00000000000000/* * nast.cpp * * * Created by Pat Schloss on 12/17/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is my implementation of the NAST (nearest alignment space termination) algorithm as described in: * * DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, & Anderson GL. 2006. NAST: a multiple * sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Research. 34:W394-9. * * To construct an object one needs to provide a method of getting a pairwise alignment (alignment) and the template * and candidate sequence that are to be aligned to each other. * */ #include "sequence.hpp" #include "alignment.hpp" #include "nast.hpp" /**************************************************************************************************/ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) { try { m = MothurOut::getInstance(); maxInsertLength = 0; pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al. regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al. } catch(exception& e) { m->errorOut(e, "Nast", "Nast"); exit(1); } } /**************************************************************************************************/ void Nast::pairwiseAlignSeqs(){ // Here we call one of the pairwise alignment methods to align our unaligned candidate // and template sequences try { alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned()); string candAln = alignment->getSeqAAln(); string tempAln = alignment->getSeqBAln(); if(candAln == ""){ candidateSeq->setPairwise(""); templateSeq->setPairwise(templateSeq->getUnaligned()); } else{ if(tempAln[0] == '-'){ int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment for(int i=0;i=0; i--){// ends where the template sequence alignment ends, if it runs if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence candAln = candAln.substr(0,i+1); tempAln = tempAln.substr(0,i+1); break; } } } } candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for templateSeq->setPairwise(tempAln); // the candidate and template sequences } catch(exception& e) { m->errorOut(e, "Nast", "pairwiseAlignSeqs"); exit(1); } } /**************************************************************************************************/ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){ // here we do steps C-F of Fig. 2 from DeSantis et al. try { int longAlignmentLength = newTemplateAlign.length(); for(int i=0; i0;leftIndex--){ // then we've got problems... if(!isalpha(candAln[leftIndex])){ leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; } break; } } for(rightIndex=i+1;rightIndex maxInsertLength){ maxInsertLength = insertLength; } if((leftRoom + rightRoom) >= insertLength){ // Parts D & E from Fig. 2 of DeSantis et al. if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's if(leftRoom >= insertLength){ // enough room to the left to move string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1)); string rightCandidateString = candAln.substr((leftIndex+1)); candAln = leftCandidateString + rightCandidateString; }else{ // not enough room to the left, have to steal some space to the right string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom))); candAln = leftCandidateString + insertString + rightCandidateString; } }else{ // the right gap is closer - > move stuff right there's if(rightRoom >= insertLength){ // enough room to the right to move string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); string leftCandidateString = candAln.substr(0,rightIndex); string rightCandidateString = candAln.substr((rightIndex+insertLength)); candAln = leftCandidateString + rightCandidateString; } else{ // not enough room to the right, have to steal some // space to the left lets move left and then right... string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); string leftCandidateString = candAln.substr(0,(leftIndex-(insertLength-rightRoom)+1)); string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); string rightCandidateString = candAln.substr((rightIndex+rightRoom)); candAln = leftCandidateString + insertString + rightCandidateString; } } if ((i - insertLength) < 0) { i = 0; } else { i -= insertLength; } } else{ // there could be a case where there isn't enough room in either direction to move stuff string leftTemplateString = newTemplateAlign.substr(0,i); string rightTemplateString = newTemplateAlign.substr((i+leftRoom+rightRoom)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); string rightCandidateString = candAln.substr((rightIndex+rightRoom)); candAln = leftCandidateString + insertString + rightCandidateString; i -= (leftRoom + rightRoom); } //if i is negative, we want to remove the extra gaps to the right if (i < 0) { m->mothurOut("i is negative\n"); } } } } catch(exception& e) { m->errorOut(e, "Nast", "removeExtraGaps"); exit(1); } } /**************************************************************************************************/ void Nast::regapSequences(){ //This is essentially part B in Fig 2. of DeSantis et al. try { string candPair = candidateSeq->getPairwise(); string candAln = ""; string tempPair = templateSeq->getPairwise(); string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide int pairwiseLength = candPair.length(); int fullAlignLength = tempAln.length(); if(candPair == ""){ for(int i=0;isetAligned(candAln); return; } int fullAlignIndex = 0; int pairwiseAlignIndex = 0; string newTemplateAlign = ""; // this is going to be messy so we want a temporary template // alignment string while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){ candAln += '.'; // add the initial '-' and '.' to the candidate and template newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences fullAlignIndex++; } string lastLoop = ""; while(pairwiseAlignIndexseems to be the opposite of the alpha scenario candAln += candPair[pairwiseAlignIndex]; newTemplateAlign += tempAln[fullAlignIndex];// pairwiseAlignIndex++; fullAlignIndex++; } else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex]) && !isalpha(candPair[pairwiseAlignIndex])){ // template pairwise has a character, but its full aligned sequence and candidate sequence have gaps // this would happen like we need to add a gap. basically the opposite of the alpha situation newTemplateAlign += tempAln[fullAlignIndex];// candAln += "-"; fullAlignIndex++; } else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex]) && !isalpha(candPair[pairwiseAlignIndex])){ // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible // would skip the gaps and not progress through full alignment sequence // not tested yet m->mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); pairwiseAlignIndex++; } else{ // everything has a gap - not possible // not tested yet m->mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); m->mothurOutEndLine(); pairwiseAlignIndex++; fullAlignIndex++; } } for(int i=fullAlignIndex;i=0;i--){ // ditto. if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } else{ end = i; break; } } for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that candAln[i] = toupper(candAln[i]); // everything is upper case } if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig. } // 2 of Desantis et al. candidateSeq->setAligned(candAln); } catch(exception& e) { m->errorOut(e, "Nast", "regapSequences"); exit(1); } } /**************************************************************************************************/ float Nast::getSimilarityScore(){ try { string cand = candidateSeq->getAligned(); string temp = templateSeq->getAligned(); int alignmentLength = temp.length(); int mismatch = 0; int denominator = 0; for(int i=0;ierrorOut(e, "Nast", "getSimilarityScore"); exit(1); } } /**************************************************************************************************/ int Nast::getMaxInsertLength(){ return maxInsertLength; } /**************************************************************************************************/ mothur-1.48.0/source/nast.hpp000077500000000000000000000024761424121717000161130ustar00rootroot00000000000000#ifndef NAST_HPP #define NAST_HPP /* * nast.hpp * * * Created by Pat Schloss on 12/17/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This is my implementation of the NAST (nearest alignment space termination) algorithm as described in: * * DeSantis TZ, Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, & Anderson GL. 2006. NAST: a multiple * sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Research. 34:W394-9. * * To construct an object one needs to provide a method of getting a pairwise alignment (alignment) and the template * and candidate sequence that are to be aligned to each other. * */ #include "mothur.h" #include "mothurout.h" class Alignment; class Sequence; /**************************************************************************************************/ class Nast { public: Nast(Alignment*, Sequence*, Sequence*); ~Nast(){}; float getSimilarityScore(); int getMaxInsertLength(); private: void pairwiseAlignSeqs(); void regapSequences(); void removeExtraGaps(string&, string, string); Alignment* alignment; Sequence* candidateSeq; Sequence* templateSeq; int maxInsertLength; MothurOut* m; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/needlemanoverlap.cpp000077500000000000000000000254501424121717000204570ustar00rootroot00000000000000/* * needleman.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is an Alignment child class that implements the Gotoh pairwise alignment algorithm as described in: * * Gotoh O. 1982. An improved algorithm for matching biological sequences. J. Mol. Biol. 162:705-8. * Myers, EW & Miller, W. 1988. Optimal alignments in linear space. Comput Appl Biosci. 4:11-7. * * This method is nice because it allows for an affine gap penalty to be assessed, which is analogous to what is used * in blast and is an alternative to Needleman-Wunsch, which only charges the same penalty for each gap position. * Because this method typically has problems at the ends when two sequences do not full overlap, we employ a separate * method to fix the ends (see Overlap class documentation) * */ #include "alignmentcell.hpp" #include "alignment.hpp" #include "overlap.hpp" #include "needlemanoverlap.hpp" /**************************************************************************************************/ NeedlemanOverlap::NeedlemanOverlap(float gO, float f, float mm, int r) :// note that we don't have a gap extend gap(gO), match(f), mismatch(mm), Alignment(r) { // the gap openning penalty is assessed for try { // every gapped position for(int i=1;ierrorOut(e, "NeedlemanOverlap", "NeedlemanOverlap"); exit(1); } } /**************************************************************************************************/ NeedlemanOverlap::~NeedlemanOverlap(){ /* do nothing */ } /**************************************************************************************************/ void NeedlemanOverlap::align(string A, string B, bool createBaseMap){ try { seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + ".\n"); } for(int i=1;i= up){ if(diagonal >= left){ alignment[i][j].cValue = diagonal; alignment[i][j].prevCell = 'd'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } else{ if(up >= left){ alignment[i][j].cValue = up; alignment[i][j].prevCell = 'u'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } } } Overlap over; over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences traceBack(createBaseMap); // Traceback the alignment to populate seqAaln and seqBaln } catch(exception& e) { m->errorOut(e, "NeedlemanOverlap", "align"); exit(1); } } /**************************************************************************************************/ //A is dna, B is protein void NeedlemanOverlap::align(Sequence A, Protein B){ try { string seq = A.getUnaligned(); vector seqA; seqA.push_back(" "); int extentionSize = 3 - (seq.length() % 3); for (int i = 0; i < extentionSize; i++) { seq += "."; } //add gaps for(int j = 0; j seqB = B.getAligned(); AminoAcid dummy('.'); seqB.insert(seqB.begin(), dummy); lB = seqB.size(); if (lA > nRows) { m->mothurOut("One of your unaligned sequence is longer than your protein sequence. Your longest protein sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + ".\n"); } for(int i=1;i= up){ if(diagonal >= left){ alignment[i][j].cValue = diagonal; alignment[i][j].prevCell = 'd'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } else{ if(up >= left){ alignment[i][j].cValue = up; alignment[i][j].prevCell = 'u'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } } } Overlap over; over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences proteinTraceBack(seqA, seqB); } catch(exception& e) { m->errorOut(e, "NeedlemanOverlap", "align"); exit(1); } } /**************************************************************************************************/ void NeedlemanOverlap::alignPrimer(string A, string B){ try { seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + ".\n"); } for(int i=1;i= up){ if(diagonal >= left){ alignment[i][j].cValue = diagonal; alignment[i][j].prevCell = 'd'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } else{ if(up >= left){ alignment[i][j].cValue = up; alignment[i][j].prevCell = 'u'; } else{ alignment[i][j].cValue = left; alignment[i][j].prevCell = 'l'; } } } } Overlap over; over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences traceBack(false); // Traceback the alignment to populate seqAaln and seqBaln } catch(exception& e) { m->errorOut(e, "NeedlemanOverlap", "alignPrimer"); exit(1); } } //********************************************************************/ bool NeedlemanOverlap::isEquivalent(char oligo, char seq){ try { bool same = true; oligo = toupper(oligo); seq = toupper(seq); if(oligo != seq){ if(oligo == 'A' && (seq != 'A' && seq != 'M' && seq != 'R' && seq != 'W' && seq != 'D' && seq != 'H' && seq != 'V')) { same = false; } else if(oligo == 'C' && (seq != 'C' && seq != 'Y' && seq != 'M' && seq != 'S' && seq != 'B' && seq != 'H' && seq != 'V')) { same = false; } else if(oligo == 'G' && (seq != 'G' && seq != 'R' && seq != 'K' && seq != 'S' && seq != 'B' && seq != 'D' && seq != 'V')) { same = false; } else if(oligo == 'T' && (seq != 'T' && seq != 'Y' && seq != 'K' && seq != 'W' && seq != 'B' && seq != 'D' && seq != 'H')) { same = false; } else if((oligo == '.' || oligo == '-')) { same = false; } else if((oligo == 'N' || oligo == 'I') && (seq == 'N')) { same = false; } else if(oligo == 'R' && (seq != 'A' && seq != 'G')) { same = false; } else if(oligo == 'Y' && (seq != 'C' && seq != 'T')) { same = false; } else if(oligo == 'M' && (seq != 'C' && seq != 'A')) { same = false; } else if(oligo == 'K' && (seq != 'T' && seq != 'G')) { same = false; } else if(oligo == 'W' && (seq != 'T' && seq != 'A')) { same = false; } else if(oligo == 'S' && (seq != 'C' && seq != 'G')) { same = false; } else if(oligo == 'B' && (seq != 'C' && seq != 'T' && seq != 'G')) { same = false; } else if(oligo == 'D' && (seq != 'A' && seq != 'T' && seq != 'G')) { same = false; } else if(oligo == 'H' && (seq != 'A' && seq != 'T' && seq != 'C')) { same = false; } else if(oligo == 'V' && (seq != 'A' && seq != 'C' && seq != 'G')) { same = false; } } return same; } catch(exception& e) { m->errorOut(e, "TrimOligos", "countDiffs"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/needlemanoverlap.hpp000077500000000000000000000026361424121717000204650ustar00rootroot00000000000000#ifndef NEEDLEMAN_H #define NEEDLEMAN_H /* * needleman.h * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class is an Alignment child class that implements the Needleman-Wunsch pairwise alignment algorithm as * described in: * * Needleman SB & Wunsch CD. 1970. A general method applicable to the search for similarities in the amino acid * sequence of two proteins. J Mol Biol. 48:443-53. * Korf I, Yandell M, & Bedell J. 2003. BLAST. O'Reilly & Associates. Sebastopol, CA. * * This method is simple as it assesses a consistent penalty for each gap position. Because this method typically has * problems at the ends when two sequences do not full overlap, we employ a separate method to fix the ends (see * Overlap class documentation) * */ #include "mothur.h" #include "alignment.hpp" /**************************************************************************************************/ class NeedlemanOverlap : public Alignment { public: NeedlemanOverlap(float, float, float, int); ~NeedlemanOverlap(); void align(string, string, bool createBaseMap=false); void align(Sequence, Protein); void alignPrimer(string, string); private: float gap; float match; float mismatch; bool isEquivalent(char, char); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/noalign.cpp000077500000000000000000000013451424121717000165620ustar00rootroot00000000000000/* * noalign.cpp * * * Created by Pat Schloss on 2/19/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "alignment.hpp" #include "noalign.hpp" /**************************************************************************************************/ NoAlign::NoAlign(){ /* do nothing */ } /**************************************************************************************************/ NoAlign::~NoAlign(){ /* do nothing */ } /**************************************************************************************************/ void NoAlign::align(string A, string B, bool createBaseMap){ } /**************************************************************************************************/ mothur-1.48.0/source/noalign.hpp000077500000000000000000000010451424121717000165640ustar00rootroot00000000000000#ifndef NOALIGN_HPP #define NOALIGN_HPP /* * noalign.hpp * * * Created by Pat Schloss on 2/19/09. * Copyright 2009Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" /**************************************************************************************************/ class NoAlign : public Alignment { public: NoAlign(); ~NoAlign(); void align(string, string, bool createBaseMap); private: }; /**************************************************************************************************/ #endif mothur-1.48.0/source/observable.h000077500000000000000000000006201424121717000167170ustar00rootroot00000000000000#ifndef OBSERVABLE_H #define OBSERVABLE_H #include "collectdisplay.h" /***********************************************************************/ class Observable { public: virtual void registerDisplay(Display*) = 0; virtual void registerDisplays(vector) = 0; virtual ~Observable() = default; }; /***********************************************************************/ #endif mothur-1.48.0/source/opticluster.cpp000077500000000000000000000306741424121717000175170ustar00rootroot00000000000000// // opticluster.cpp // Mothur // // Created by Sarah Westcott on 4/20/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #include "opticluster.h" OptiCluster::OptiCluster(OptiData* mt, ClusterMetric* met, long long ns) : Cluster() { matrix = mt; metric = met; truePositives = 0; trueNegatives = 0; falseNegatives = 0; falsePositives = 0; numSingletons = ns; } /***********************************************************************/ //randomly assign sequences to OTUs int OptiCluster::initialize(double& value, bool randomize, string initialize) { try { numSeqs = matrix->getNumSeqs(); truePositives = 0; falsePositives = 0; falseNegatives = 0; trueNegatives = 0; bins.resize(numSeqs); //place seqs in own bin vector temp; bins.push_back(temp); seqBin[numSeqs] = -1; insertLocation = numSeqs; Utils util; if (initialize == "singleton") { //put everyone in own bin for (int i = 0; i < numSeqs; i++) { bins[i].push_back(i); } //maps randomized sequences to bins for (int i = 0; i < numSeqs; i++) { seqBin[i] = bins[i][0]; randomizeSeqs.push_back(i); } if (randomize) { util.mothurRandomShuffle(randomizeSeqs); } //for each sequence (singletons removed on read) for (map::iterator it = seqBin.begin(); it != seqBin.end(); it++) { if (it->second == -1) { } else { long long numCloseSeqs = (matrix->getNumClose(it->first)); //does not include self falseNegatives += numCloseSeqs; } } falseNegatives /= 2; //square matrix trueNegatives = numSeqs * (numSeqs-1)/2 - (falsePositives + falseNegatives + truePositives); //since everyone is a singleton no one clusters together. True negative = num far apart }else { //put everyone in first bin for (int i = 0; i < numSeqs; i++) { bins[0].push_back(i); seqBin[i] = 0; randomizeSeqs.push_back(i); } if (randomize) { util.mothurRandomShuffle(randomizeSeqs); } //for each sequence (singletons removed on read) for (map::iterator it = seqBin.begin(); it != seqBin.end(); it++) { if (it->second == -1) { } else { long long numCloseSeqs = (matrix->getNumClose(it->first)); //does not include self truePositives += numCloseSeqs; } } truePositives /= 2; //square matrix falsePositives = numSeqs * (numSeqs-1)/2 - (trueNegatives + falseNegatives + truePositives); } value = metric->getValue(truePositives, trueNegatives, falsePositives, falseNegatives); return value; } catch(exception& e) { m->errorOut(e, "OptiCluster", "initialize"); exit(1); } } /***********************************************************************/ /* for each sequence with mutual information (close) * remove from current OTU and calculate MCC when sequence forms its own OTU or joins one of the other OTUs where there is a sequence within the `threshold` (no need to calculate MCC if the paired sequence is already in same OTU and no need to try every OTU - just those where there's a close sequence) * keep or move the sequence to the OTU where the `metric` is the largest - flip a coin on ties */ bool OptiCluster::update(double& listMetric) { try { //for each sequence (singletons removed on read) for (int i = 0; i < randomizeSeqs.size(); i++) { if (m->getControl_pressed()) { break; } map::iterator it = seqBin.find(randomizeSeqs[i]); long long seqNumber = it->first; long long binNumber = it->second; if (binNumber == -1) { } else { double tn, tp, fp, fn; double bestMetric = -1; double bestBin, bestTp, bestTn, bestFn, bestFp; tn = trueNegatives; tp = truePositives; fp = falsePositives; fn = falseNegatives; //close / far count in current bin vector results = getCloseFarCounts(seqNumber, binNumber); double cCount = results[0]; double fCount = results[1]; //metric in current bin bestMetric = metric->getValue(tp, tn, fp, fn); bestBin = binNumber; bestTp = tp; bestTn = tn; bestFp = fp; bestFn = fn; //if not already singleton, then calc value if singleton was created if (!((bins[binNumber].size()) == 1)) { //make a singleton //move out of old bin fn+=cCount; tn+=fCount; fp-=fCount; tp-=cCount; double singleMetric = metric->getValue(tp, tn, fp, fn); if (singleMetric > bestMetric) { bestBin = -1; bestTp = tp; bestTn = tn; bestFp = fp; bestFn = fn; bestMetric = singleMetric; } } set binsToTry; set closeSeqs = matrix->getCloseSeqs(seqNumber); for (set::iterator itClose = closeSeqs.begin(); itClose != closeSeqs.end(); itClose++) { binsToTry.insert(seqBin[*itClose]); } //merge into each "close" otu for (set::iterator it = binsToTry.begin(); it != binsToTry.end(); it++) { tn = trueNegatives; tp = truePositives; fp = falsePositives; fn = falseNegatives; fn+=cCount; tn+=fCount; fp-=fCount; tp-=cCount; //move out of old bin results = getCloseFarCounts(seqNumber, *it); fn-=results[0]; tn-=results[1]; tp+=results[0]; fp+=results[1]; //move into new bin double newMetric = metric->getValue(tp, tn, fp, fn); //score when sequence is moved //new best if (newMetric > bestMetric) { bestMetric = newMetric; bestBin = (*it); bestTp = tp; bestTn = tn; bestFp = fp; bestFn = fn; } } bool usedInsert = false; if (bestBin == -1) { bestBin = insertLocation; usedInsert = true; } if (bestBin != binNumber) { truePositives = bestTp; trueNegatives = bestTn; falsePositives = bestFp; falseNegatives = bestFn; //move seq from i to j bins[bestBin].push_back(seqNumber); //add seq to bestbin bins[binNumber].erase(remove(bins[binNumber].begin(), bins[binNumber].end(), seqNumber), bins[binNumber].end()); //remove from old bin i } if (usedInsert) { insertLocation = findInsert(); } //update seqBins seqBin[seqNumber] = bestBin; //set new OTU location } } listMetric = metric->getValue(truePositives, trueNegatives, falsePositives, falseNegatives); if (m->getDebug()) { ListVector* list = getList(); list->print(cout); delete list; } return 0; } catch(exception& e) { m->errorOut(e, "OptiCluster", "update"); exit(1); } } /***********************************************************************/ vector OptiCluster::getCloseFarCounts(long long seq, long long newBin) { try { vector results; results.push_back(0); results.push_back(0); if (newBin == -1) { } //making a singleton bin. Close but we are forcing apart. else { //merging a bin for (int i = 0; i < bins[newBin].size(); i++) { if (seq == bins[newBin][i]) {} //ignore self else if (!matrix->isClose(seq, bins[newBin][i])) { results[1]++; } //this sequence is "far away" from sequence i - above the cutoff else { results[0]++; } //this sequence is "close" to sequence i - distance between them is less than cutoff } } return results; } catch(exception& e) { m->errorOut(e, "OptiCluster", "getCloseFarCounts"); exit(1); } } /***********************************************************************/ vector OptiCluster::getStats( double& tp, double& tn, double& fp, double& fn) { try { double singletn = matrix->getNumSingletons() + numSingletons; double tempnumSeqs = numSeqs + singletn; tp = truePositives; fp = falsePositives; fn = falseNegatives; tn = tempnumSeqs * (tempnumSeqs-1)/2 - (falsePositives + falseNegatives + truePositives); //adds singletons to tn vector results; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); results.push_back(sensitivity); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); results.push_back(specificity); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); results.push_back(positivePredictiveValue); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); results.push_back(negativePredictiveValue); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); results.push_back(falseDiscoveryRate); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); results.push_back(accuracy); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); results.push_back(matthewsCorrCoef); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); results.push_back(f1Score); return results; } catch(exception& e) { m->errorOut(e, "OptiCluster", "getStats"); exit(1); } } /***********************************************************************/ ListVector* OptiCluster::getList() { try { ListVector* list = new ListVector(); ListVector* singleton = matrix->getListSingle(); if (singleton != nullptr) { //add in any sequences above cutoff in read. Removing these saves clustering time. for (int i = 0; i < singleton->getNumBins(); i++) { if (singleton->get(i) != "") { list->push_back(singleton->get(i)); } } delete singleton; } for (int i = 0; i < bins.size(); i++) { if (bins[i].size() != 0) { string otu = matrix->getName(bins[i][0]); for (int j = 1; j < bins[i].size(); j++) { otu += "," + matrix->getName(bins[i][j]); } list->push_back(otu); } } return list; } catch(exception& e) { m->errorOut(e, "OptiCluster", "getList"); exit(1); } } /***********************************************************************/ long long OptiCluster::getNumBins() { try { long long singletn = matrix->getNumSingletons(); for (int i = 0; i < bins.size(); i++) { if (bins[i].size() != 0) { singletn++; } } return singletn; } catch(exception& e) { m->errorOut(e, "OptiCluster", "getNumBins"); exit(1); } } /***********************************************************************/ long long OptiCluster::findInsert() { try { //initially there are bins for each sequence (excluding singletons removed on read) for (long long i = 0; i < bins.size(); i++) { if (m->getControl_pressed()) { break; } if (bins[i].size() == 0) { return i; } //this bin is empty } return -1; } catch(exception& e) { m->errorOut(e, "OptiCluster", "findInsert"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/opticluster.h000077500000000000000000000042741424121717000171610ustar00rootroot00000000000000// // opticluster.h // Mothur // // Created by Sarah Westcott on 4/20/16. // Copyright (c) 2016 Schloss Lab. All rights reserved. // #ifndef __Mothur__opticluster__ #define __Mothur__opticluster__ #include "cluster.hpp" #include "optimatrix.h" #include "calculator.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" /***********************************************************************/ /* #ifdef UNIT_TEST friend class TestOptiCluster; OptiCluster() : Cluster() { m = MothurOut::getInstance(); truePositives = 0; trueNegatives = 0; falseNegatives = 0; falsePositives = 0; } //for testing class void setVariables(OptiData* mt, ClusterMetric* met) { matrix = mt; metric = met; } #endif */ class OptiCluster : public Cluster { public: OptiCluster(OptiData* mt, ClusterMetric* met, long long ns); ~OptiCluster() = default; bool updateDistance(PDistCell& colCell, PDistCell& rowCell) { return false; } string getTag() { string tag = "opti_" + metric->getName(); return tag; } long long getNumBins(); int initialize(double&, bool, string); //randomize and place in "best" OTUs bool update(double&); //returns whether list changed and MCC vector getStats( double&, double&, double&, double&); ListVector* getList(); protected: OptiData* matrix; ClusterMetric* metric; vector randomizeSeqs; vector< vector > bins; //bin[0] -> seqs in bin[0] map binLabels; //for fitting - maps binNumber to existing reference label map seqBin; //sequence# -> bin# long long numSeqs, insertLocation, numSingletons; double truePositives, trueNegatives, falsePositives, falseNegatives; long long findInsert(); vector getCloseFarCounts(long long seq, long long newBin); vector getFitStats( long long&, long long&, long long&, long long&); }; #endif /* defined(__Mothur__opticluster__) */ mothur-1.48.0/source/optifitcluster.cpp000066400000000000000000000772271424121717000202240ustar00rootroot00000000000000// // optifitcluster.cpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "optifitcluster.hpp" /***********************************************************************/ OptiFitCluster::OptiFitCluster(OptiData* mt, ClusterMetric* met, long long ns) : Cluster(), matrix(mt), metric(met), numComboSingletons(ns) { m = MothurOut::getInstance(); maxRefBinNumber = 0; closed = false; numFitSeqs = 0; fittruePositives = 0; fitfalsePositives = 0; fitfalseNegatives = 0; fittrueNegatives = 0; numFitSingletons = 0; numComboSeqs = 0; numComboSingletons = 0; combotruePositives = 0; combofalsePositives = 0; combofalseNegatives = 0; combotrueNegatives = 0; } /***********************************************************************/ int OptiFitCluster::initialize(double& value, bool randomize, vector > existingBins, vector bls, string meth, bool denov) { try { double reftruePositives, reftrueNegatives, reffalsePositives, reffalseNegatives, numRefSeqs; numRefSeqs = 0; reftruePositives = 0; reffalsePositives = 0; reffalseNegatives = 0; reftrueNegatives = 0; if (meth == "closed") { closed = true; } denovo = denov; vector< vector< long long> > translatedBins; randomizeSeqs = matrix->getTranslatedBins(existingBins, translatedBins); //otus in existingBins, otus with matrix names int binNumber = 0; int placeHolderIndex = -1; for (long long i = 0; i < translatedBins.size(); i++) { binLabels[binNumber] = bls[i]; bins.push_back(translatedBins[i]); numRefSeqs += translatedBins[i].size(); for (int j = 0; j < translatedBins[i].size(); j++) { for (int k = 0; k < j; k++) { if (translatedBins[i][j] < 0) { //no dists in matrix translatedBins[i][j] = placeHolderIndex; placeHolderIndex--; reffalsePositives++; }else { //j has distances in the matrix, but is it close to k? if (matrix->isClose(translatedBins[i][j], translatedBins[i][k])) { reftruePositives++; }else { reffalsePositives++; } } } seqBin[translatedBins[i][j]] = binNumber; } binNumber++; } maxRefBinNumber = binNumber; reffalseNegatives = matrix->getNumRefDists() - reftruePositives; //number of distance in matrix for reference seqs - reftruePositives reftrueNegatives = numRefSeqs * (numRefSeqs-1)/2 - (reffalsePositives + reffalseNegatives + reftruePositives); //add fit seqs as singletons int numRefBins = translatedBins.size(); numFitSingletons = 0; //put every fit seq in own bin for (long long i = 0; i < randomizeSeqs.size(); i++) { vector thisBin; thisBin.push_back(randomizeSeqs[i]); bins.push_back(thisBin); seqBin[randomizeSeqs[i]] = numRefBins+i; long long numCloseSeqs = (matrix->getNumFitClose(randomizeSeqs[i])); //does not include self fitfalseNegatives += numCloseSeqs; if (numCloseSeqs == 0) { numFitSingletons++; } //you are a singletons counted by the matrix as a fitSingleton, but you are not removed because you have ref dists we want to use in the fitting. Don't want to count you twice in stats output. } numFitSeqs = randomizeSeqs.size(); fitfalseNegatives /= 2; //square matrix fittrueNegatives = numFitSeqs * (numFitSeqs-1)/2 - (fitfalsePositives + fitfalseNegatives + fittruePositives); //since everyone is a singleton no one clusters together. True negative = num far apart numComboSeqs = numRefSeqs + randomizeSeqs.size(); combofalseNegatives = matrix->getNumDists() - reftruePositives; //number of distance in matrix for reference seqs - reftruePositives combotrueNegatives = numComboSeqs * (numComboSeqs-1)/2 - (reffalsePositives + reffalseNegatives + reftruePositives); combotruePositives = reftruePositives; combofalsePositives = reffalsePositives; double comboValue = metric->getValue(combotruePositives, combotrueNegatives, combofalsePositives, combofalseNegatives); //add insert location seqBin[bins.size()] = -1; insertLocation = bins.size(); vector temp; bins.push_back(temp); if (randomize) { util.mothurRandomShuffle(randomizeSeqs); } value = comboValue; return value; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "initialize"); exit(1); } } /***********************************************************************/ /* for each sequence with mutual information (close) * remove from current OTU and calculate MCC when sequence forms its own OTU or joins one of the other OTUs where there is a sequence within the `threshold` (no need to calculate MCC if the paired sequence is already in same OTU and no need to try every OTU - just those where there's a close sequence) * keep or move the sequence to the OTU where the `metric` is the largest - flip a coin on ties */ bool OptiFitCluster::update(double& listMetric) { try { //for each sequence (singletons removed on read) for (int i = 0; i < randomizeSeqs.size(); i++) { if (m->getControl_pressed()) { break; } map::iterator it = seqBin.find(randomizeSeqs[i]); int seqNumber = it->first; int binNumber = it->second; if (binNumber == -1) { } else { vector bestBin; bestBin.resize(2, binNumber); vector tn; tn.push_back(fittrueNegatives); tn.push_back(combotrueNegatives); vector tp; tp.push_back(fittruePositives); tp.push_back(combotruePositives); vector fp; fp.push_back(fitfalsePositives); fp.push_back(combofalsePositives); vector fn; fn.push_back(fitfalseNegatives); fn.push_back(combofalseNegatives); vector bestMetric; bestMetric.resize(2, -1); //bestMetric[0] = fitSeqs alone, bestMetric[1] = combo or ref and fit vector bestTp; bestTp.resize(2, 0); vector bestTn; bestTn.resize(2, 0); vector bestFp; bestFp.resize(2, 0); vector bestFn; bestFn.resize(2, 0); //close / far count in current bin vector results = getCloseFarCounts(seqNumber, binNumber); double combocCount = results[0]; double combofCount = results[1]; //close / far count in current bin for fit seqs vector fitresults = getCloseFarFitCounts(seqNumber, binNumber); double fitcCount = fitresults[0]; double fitfCount = fitresults[1]; //fit metrics in current bin bestMetric[0] = metric->getValue(tp[0], tn[0], fp[0], fn[0]); bestTp[0] = tp[0]; bestTn[0] = tn[0]; bestFp[0] = fp[0]; bestFn[0] = fn[0]; //combo metric in current bin bestMetric[1] = metric->getValue(tp[1], tn[1], fp[1], fn[1]); bestTp[1] = tp[1]; bestTn[1] = tn[1]; bestFp[1] = fp[1]; bestFn[1] = fn[1]; //if not already singleton, then calc value if singleton was created if (!((bins[binNumber].size()) == 1)) { //make a singleton fn[0]+=fitcCount; tn[0]+=fitfCount; fp[0]-=fitfCount; tp[0]-=fitcCount; fn[1]+=combocCount; tn[1]+=combofCount; fp[1]-=combofCount; tp[1]-=combocCount; double singleFitMetric = metric->getValue(tp[0], tn[0], fp[0], fn[0]); double singleComboMetric = metric->getValue(tp[1], tn[1], fp[1], fn[1]); if ((singleFitMetric > bestMetric[0]) || (singleComboMetric > bestMetric[1])) { bestBin[1] = -1; bestTp[1] = tp[1]; bestTn[1] = tn[1]; bestFp[1] = fp[1]; bestFn[1] = fn[1]; bestMetric[1] = singleComboMetric; bestBin[0] = -1; bestTp[0] = tp[0]; bestTn[0] = tn[0]; bestFp[0] = fp[0]; bestFn[0] = fn[0]; bestMetric[0] = singleFitMetric; } } set binsToTry; set closeSeqs = matrix->getCloseRefSeqs(seqNumber); for (set::iterator itClose = closeSeqs.begin(); itClose != closeSeqs.end(); itClose++) { binsToTry.insert(seqBin[*itClose]); } //merge into each "close" otu vector > ties; vector > ties0; for (set::iterator it = binsToTry.begin(); it != binsToTry.end(); it++) { //reset tn, tp,fp,fn values to original bin tn[0] = fittrueNegatives; tp[0] = fittruePositives; fp[0] = fitfalsePositives; fn[0] = fitfalseNegatives; tn[1] = combotrueNegatives; tp[1] = combotruePositives; fp[1] = combofalsePositives; fn[1] = combofalseNegatives; //move out of old bin fn[0]+=fitcCount; tn[0]+=fitfCount; fp[0]-=fitfCount; tp[0]-=fitcCount; fn[1]+=combocCount; tn[1]+=combofCount; fp[1]-=combofCount; tp[1]-=combocCount; results = getCloseFarCounts(seqNumber, *it); //results[0] = close count, results[1] = far count fn[1]-=results[0]; tn[1]-=results[1]; tp[1]+=results[0]; fp[1]+=results[1]; //move into new bin results = getCloseFarFitCounts(seqNumber, *it); fn[0]-=results[0]; tn[0]-=results[1]; tp[0]+=results[0]; fp[0]+=results[1]; //move into new bin - only consider fit seqs double newComboMetric = metric->getValue(tp[1], tn[1], fp[1], fn[1]); //score when sequence is moved double newFitMetric = metric->getValue(tp[0], tn[0], fp[0], fn[0]); //score when sequence is moved //new best if (newComboMetric > bestMetric[1]) { ties.clear(); ties0.clear(); bestMetric[1] = newComboMetric; bestBin[1] = (*it); bestTp[1] = tp[1]; bestTn[1] = tn[1]; bestFp[1] = fp[1]; bestFn[1] = fn[1]; bestMetric[0] = newFitMetric; bestBin[0] = (*it); bestTp[0] = tp[0]; bestTn[0] = tn[0]; bestFp[0] = fp[0]; bestFn[0] = fn[0]; vector tie; tie.push_back(bestMetric[1]); tie.push_back(bestBin[1]); tie.push_back(bestTp[1]); tie.push_back(bestTn[1]); tie.push_back(bestFp[1]); tie.push_back(bestFn[1]); ties.push_back(tie); vector tie0; tie0.push_back(bestMetric[0]); tie0.push_back(bestBin[0]); tie0.push_back(bestTp[0]); tie0.push_back(bestTn[0]); tie0.push_back(bestFp[0]); tie0.push_back(bestFn[0]); ties0.push_back(tie0); }else if (newComboMetric == bestMetric[1]) { bestMetric[1] = newComboMetric; bestBin[1] = (*it); bestTp[1] = tp[1]; bestTn[1] = tn[1]; bestFp[1] = fp[1]; bestFn[1] = fn[1]; bestMetric[0] = newFitMetric; bestBin[0] = (*it); bestTp[0] = tp[0]; bestTn[0] = tn[0]; bestFp[0] = fp[0]; bestFn[0] = fn[0]; vector tie; tie.push_back(bestMetric[1]); tie.push_back(bestBin[1]); tie.push_back(bestTp[1]); tie.push_back(bestTn[1]); tie.push_back(bestFp[1]); tie.push_back(bestFn[1]); ties.push_back(tie); vector tie0; tie0.push_back(bestMetric[0]); tie0.push_back(bestBin[0]); tie0.push_back(bestTp[0]); tie0.push_back(bestTn[0]); tie0.push_back(bestFp[0]); tie0.push_back(bestFn[0]); ties0.push_back(tie0); } } if (ties.size() > 1) { int randomTie = util.getRandomIndex((int)ties.size()-1); bestMetric[1] = ties[randomTie][0]; bestBin[1] = ties[randomTie][1]; bestTp[1] = ties[randomTie][2]; bestTn[1] = ties[randomTie][3]; bestFp[1] = ties[randomTie][4]; bestFn[1] = ties[randomTie][5]; bestMetric[0] = ties0[randomTie][0]; bestBin[0] = ties0[randomTie][1]; bestTp[0] = ties0[randomTie][2]; bestTn[0] = ties0[randomTie][3]; bestFp[0] = ties0[randomTie][4]; bestFn[0] = ties0[randomTie][5]; } //how to choose the best bin if they differ???? long long newBin = bestBin[1]; bool usedInsert = false; if (newBin == -1) { newBin = insertLocation; usedInsert = true; } if (newBin != binNumber) { combotruePositives = bestTp[1]; combotrueNegatives = bestTn[1]; combofalsePositives = bestFp[1]; combofalseNegatives = bestFn[1]; fittruePositives = bestTp[0]; fittrueNegatives = bestTn[0]; fitfalsePositives = bestFp[0]; fitfalseNegatives = bestFn[0]; //move seq from i to j bins[newBin].push_back(seqNumber); //add seq to bestbin bins[binNumber].erase(remove(bins[binNumber].begin(), bins[binNumber].end(), seqNumber), bins[binNumber].end()); //remove from old bin i } if (usedInsert) { insertLocation = findInsert(); } //update seqBins seqBin[seqNumber] = newBin; //set new OTU location } } listMetric = metric->getValue(combotruePositives, combotrueNegatives, combofalsePositives, combofalseNegatives); if (m->getDebug()) { ListVector* list = getList(); list->print(cout); delete list; } return 0; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "update"); exit(1); } } /***********************************************************************/ vector OptiFitCluster::getCloseFarCounts(long long seq, long long newBin) { try { vector results; results.push_back(0); results.push_back(0); //results[0] = close count, results[1] = far count if (newBin == -1) { } //making a singleton bin. Close but we are forcing apart. else { //merging a bin for (long long i = 0; i < bins[newBin].size(); i++) { if (seq == bins[newBin][i]) {} //ignore self else if (!matrix->isClose(seq, bins[newBin][i])) { results[1]++; } //this sequence is "far away" from sequence i - above the cutoff else { results[0]++; } //this sequence is "close" to sequence i - distance between them is less than cutoff } } return results; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getCloseFarCounts"); exit(1); } } /***********************************************************************/ vector OptiFitCluster::getCloseFarFitCounts(long long seq, long long newBin) { try { vector results; results.push_back(0); results.push_back(0); //results[0] = close count, results[1] = far count if (newBin == -1) { } //making a singleton bin. Close but we are forcing apart. else { //merging a bin for (long long i = 0; i < bins[newBin].size(); i++) { if (seq == bins[newBin][i]) {} //ignore self else { bool isFit = true; bool closeFit = matrix->isCloseFit(seq, bins[newBin][i], isFit); if (closeFit) { //you are close if you are fit and close results[0]++; }else if (isFit) { results[1]++; } //this sequence is "far away" and fit - above the cutoff } } } return results; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getCloseFarCounts"); exit(1); } } /***********************************************************************/ vector OptiFitCluster::getStats(double& tp, double& tn, double& fp, double& fn) { try { double singletn = 0; if (!closed) { singletn = matrix->getNumSingletons(); } double tempnumSeqs = numComboSeqs + singletn; tp = combotruePositives; fp = combofalsePositives; fn = combofalseNegatives; tn = tempnumSeqs * (tempnumSeqs-1)/2 - (combofalsePositives + combofalseNegatives + combotruePositives); //adds singletons to tn vector results; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); results.push_back(sensitivity); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); results.push_back(specificity); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); results.push_back(positivePredictiveValue); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); results.push_back(negativePredictiveValue); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); results.push_back(falseDiscoveryRate); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); results.push_back(accuracy); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); results.push_back(matthewsCorrCoef); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); results.push_back(f1Score); return results; } catch(exception& e) { m->errorOut(e, "OptiCluster", "getStats"); exit(1); } } /***********************************************************************/ vector OptiFitCluster::getFitStats(double& tp, double& tn, double& fp, double& fn) { try { double singletn = 0; if (!closed) { singletn = matrix->getNumFitTrueSingletons(); } double tempnumSeqs = numFitSeqs + singletn; //numFitSingletons are reads that are selected as the fit seqs, that have dists to reference but not dists to other fit seqs. They are included tp = fittruePositives; fp = fitfalsePositives; fn = fitfalseNegatives; tn = tempnumSeqs * (tempnumSeqs-1)/2 - (fitfalsePositives + fitfalseNegatives + fittruePositives); //adds singletons to tn vector results; Sensitivity sens; double sensitivity = sens.getValue(tp, tn, fp, fn); results.push_back(sensitivity); Specificity spec; double specificity = spec.getValue(tp, tn, fp, fn); results.push_back(specificity); PPV ppv; double positivePredictiveValue = ppv.getValue(tp, tn, fp, fn); results.push_back(positivePredictiveValue); NPV npv; double negativePredictiveValue = npv.getValue(tp, tn, fp, fn); results.push_back(negativePredictiveValue); FDR fdr; double falseDiscoveryRate = fdr.getValue(tp, tn, fp, fn); results.push_back(falseDiscoveryRate); Accuracy acc; double accuracy = acc.getValue(tp, tn, fp, fn); results.push_back(accuracy); MCC mcc; double matthewsCorrCoef = mcc.getValue(tp, tn, fp, fn); results.push_back(matthewsCorrCoef); F1Score f1; double f1Score = f1.getValue(tp, tn, fp, fn); results.push_back(f1Score); return results; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getFitStats"); exit(1); } } /***********************************************************************/ ListVector* OptiFitCluster::getList() { try { ListVector* list = new ListVector(); ListVector* singleton = matrix->getListSingle(); if (singleton != nullptr) { //add in any sequences above cutoff in read. Removing these saves clustering time. for (int i = 0; i < singleton->getNumBins(); i++) { if (singleton->get(i) != "") { list->push_back(singleton->get(i)); } } delete singleton; } for (int i = 0; i < bins.size(); i++) { vector thisBin; for (int j = 0; j < bins[i].size(); j++) { if (bins[i][j] >= 0) { thisBin.push_back(bins[i][j]); } } if (thisBin.size() != 0) { string otu = matrix->getName(thisBin[0]); for (int j = 1; j < thisBin.size(); j++) { otu += "," + matrix->getName(thisBin[j]); } list->push_back(otu); } } return list; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getList"); exit(1); } } /***********************************************************************/ ListVector* OptiFitCluster::getFittedList(string label, bool includerefs) { try { ListVector* list = new ListVector(); map newBins; set unFitted; long long numListSeqs = 0; for (long long i = 0; i < randomizeSeqs.size(); i++) { //build otus if (m->getControl_pressed()) { break; } map::iterator it = seqBin.find(randomizeSeqs[i]); long long seqNumber = it->first; long long binNumber = it->second; map::iterator itBinLabels = binLabels.find(binNumber); //do we have a label for this bin. If the seq maps to existing bin then we should, otherwise we couldn't "fit" this sequence if (itBinLabels != binLabels.end()) { numListSeqs++; map::iterator itBin = newBins.find(binNumber); // have we seen this otu yet? if (itBin == newBins.end()) { //create bin newBins[binNumber] = matrix->getName(seqNumber); }else { //append bin newBins[binNumber] += "," + matrix->getName(seqNumber); } }else { unFitted.insert(seqNumber); } } if (denovo || includerefs) { //add in refs vector refs = matrix->getRefSeqs(); for (long long i = 0; i < refs.size(); i++) { if (m->getControl_pressed()) { break; } map::iterator it = seqBin.find(refs[i]); long long seqNumber = it->first; long long binNumber = it->second; map::iterator itBin = newBins.find(binNumber); // have we seen this otu yet? if (itBin == newBins.end()) { //create bin newBins[binNumber] = matrix->getName(seqNumber); }else { //append bin newBins[binNumber] += "," + matrix->getName(seqNumber); } } } //numFitSeqs does not include any kind of singleton long long numUnFitted = (numFitSeqs + numFitSingletons - numListSeqs); //getNumFitTrueSingletons are fit reads that have no dists in the matrix. This can be confusing, think of it like this: there are true singletons, meaning we don't care if you are a ref or fit and you have no dists below the cutoff. This means you will be in your own OTU no matter what we do. There are fitSingletons, meaning you are a fit sequence and have no dists below the cutoff that coorespond to other fit seqs ( NOTE: you may or may not have dists to ref seqs or you could be a true singleton or a just a singleton because of the references chosen). long long numSingletonBins = 0; if ((label != "") && (numUnFitted != 0)) { m->mothurOut("\nFitted " + toString(numListSeqs) + " sequences to " + toString(newBins.size()) + " existing OTUs.\n"); if (!closed) { //cluster the unfitted seqs separately m->mothurOut(toString(numUnFitted) + " sequences were unable to be fitted existing OTUs, excluding singletons.\n"); m->mothurOut("\n**************** Clustering the unfitted sequences ****************\n"); OptiData* unFittedMatrix = matrix->extractMatrixSubset(unFitted); ListVector* unfittedList = clusterUnfitted(unFittedMatrix, label); //unfittedList includes unfitted singletons if (unfittedList != nullptr) { m->mothurOut("The unfitted sequences clustered into " + toString(unfittedList->getNumBins()) + " new OTUs.\n"); //+unFittedMatrix->getNumSingletons()+ matrix->getNumFitSingletons() for (int i = 0; i < unfittedList->getNumBins(); i++) { string bin = unfittedList->get(i); if (bin != "") { list->push_back(unfittedList->get(i)); } } delete unfittedList; } delete unFittedMatrix; m->mothurOut("\n*******************************************************************\n\n"); //add in fit singletons ListVector* singleton = matrix->getFitListSingle(); if (singleton != nullptr) { //add in any sequences above cutoff in read. Removing these saves clustering time. for (int i = 0; i < singleton->getNumBins(); i++) { if (m->getControl_pressed()) { break; } if (singleton->get(i) != "") { list->push_back(singleton->get(i)); } } numSingletonBins += singleton->getNumBins(); delete singleton; } }else { m->mothurOut("\nSequences that were unable to be fitted existing OTUs will be listed in the *.optifit_scrap.accnos file.\n"); unfittedNames = matrix->getNames(unFitted); //add in fit singletons ListVector* singleton = matrix->getFitListSingle(); if (singleton != nullptr) { //add in any sequences above cutoff in read. Removing these saves clustering time. for (int i = 0; i < singleton->getNumBins(); i++) { if (m->getControl_pressed()) { break; } if (singleton->get(i) != "") { unfittedNames.insert(singleton->get(i)); } } delete singleton; } } }else { if (label != "") { m->mothurOut("\nFitted all " + toString(list->getNumSeqs()) + " sequences to existing OTUs. \n"); } } vector newLabels = list->getLabels(); for (map::iterator itBin = newBins.begin(); itBin != newBins.end(); itBin++) { list->push_back(itBin->second); newLabels.push_back(binLabels[itBin->first]); } list->setLabels(newLabels); return list; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getFittedList"); exit(1); } } /***********************************************************************/ ListVector* OptiFitCluster::clusterUnfitted(OptiData* unfittedMatrix, string label) { try { ListVector* list = nullptr; OptiCluster cluster(unfittedMatrix, metric, 0); int iters = 0; double listVectorMetric = 0; //worst state double delta = 1; cluster.initialize(listVectorMetric, true, "singleton"); long long numBins = cluster.getNumBins(); m->mothurOut("\n\niter\ttime\tlabel\tnum_otus\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"); double tp, tn, fp, fn; vector results = cluster.getStats(tp, tn, fp, fn); m->mothurOut("0\t0\t" + label + "\t" + toString(numBins) + "\t"+ label + "\t" + toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); while ((delta > 0.0001) && (iters < 100)) { long start = time(nullptr); if (m->getControl_pressed()) { break; } double oldMetric = listVectorMetric; cluster.update(listVectorMetric); delta = abs(oldMetric - listVectorMetric); iters++; results = cluster.getStats(tp, tn, fp, fn); numBins = cluster.getNumBins(); m->mothurOut(toString(iters) + "\t" + toString(time(nullptr) - start) + "\t" + label + "\t" + toString(numBins) + "\t" + label + "\t"+ toString(tp) + "\t" + toString(tn) + "\t" + toString(fp) + "\t" + toString(fn) + "\t"); for (int i = 0; i < results.size(); i++) { m->mothurOut(toString(results[i]) + "\t"); } m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOutEndLine(); if (m->getControl_pressed()) { return list; } list = cluster.getList(); list->setLabel(label); return list; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "clusterUnfitted"); exit(1); } } /***********************************************************************/ long long OptiFitCluster::getNumBins() { try { long long singletn = 0; singletn = matrix->getNumSingletons(); for (int i = 0; i < bins.size(); i++) { if (bins[i].size() != 0) { singletn++; } } return singletn; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getNumBins"); exit(1); } } /***********************************************************************/ long long OptiFitCluster::getNumFitBins() { try { ListVector* list = getFittedList("", false); int numBins = 0; if (list != nullptr) { numBins = list->getNumBins(); delete list; } return numBins; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "getNumFitBins"); exit(1); } } /***********************************************************************/ int OptiFitCluster::findInsert() { try { //initially there are bins for each sequence (excluding singletons removed on read) for (int i = 0; i < bins.size(); i++) { if (m->getControl_pressed()) { break; } if (bins[i].size() == 0) { return i; } //this bin is empty } return -1; } catch(exception& e) { m->errorOut(e, "OptiFitCluster", "findInsert"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/optifitcluster.hpp000066400000000000000000000050121424121717000202100ustar00rootroot00000000000000// // optifitcluster.hpp // Mothur // // Created by Sarah Westcott on 5/10/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef optifitcluster_hpp #define optifitcluster_hpp #include "cluster.hpp" #include "optimatrix.h" #include "calculator.h" #include "mcc.hpp" #include "sensitivity.hpp" #include "specificity.hpp" #include "fdr.hpp" #include "npv.hpp" #include "ppv.hpp" #include "f1score.hpp" #include "tp.hpp" #include "fp.hpp" #include "fpfn.hpp" #include "tptn.hpp" #include "tn.hpp" #include "fn.hpp" #include "accuracy.hpp" #include "opticluster.h" /***********************************************************************/ class OptiFitCluster : public Cluster { public: OptiFitCluster(OptiData* mt, ClusterMetric* met, long long ns); ~OptiFitCluster() = default; int initialize(double& value, bool randomize, vector > existingBins, vector, string, bool); bool update(double&); //returns whether list changed and MCC bool updateDistance(PDistCell& colCell, PDistCell& rowCell) { return false; } //inheritance compliant string getTag() { string tag = "optifit_" + metric->getName(); return tag; } long long getNumBins(); long long getNumFitBins(); vector getStats( double&, double&, double&, double&); //combo stats vector getFitStats( double&, double&, double&, double&); //fitted seqs stats ListVector* getList(); ListVector* getFittedList(string, bool); set getUnfittedNames() { return unfittedNames; } protected: MothurOut* m; Utils util; ClusterMetric* metric; OptiData* matrix; map seqBin; //sequence# -> bin# vector randomizeSeqs; vector< vector > bins; //bin[0] -> seqs in bin[0] map binLabels; //for fitting - maps binNumber to existing reference label long long maxRefBinNumber; bool closed, denovo; set unfittedNames; double fittruePositives, fittrueNegatives, fitfalsePositives, fitfalseNegatives, combotruePositives, combotrueNegatives, combofalsePositives, combofalseNegatives; long long numFitSeqs, insertLocation, numFitSingletons; long long numComboSeqs, numComboSingletons; int findInsert(); vector getCloseFarCounts(long long seq, long long newBin); vector getCloseFarFitCounts(long long seq, long long newBin); ListVector* clusterUnfitted(OptiData*, string); }; #endif /* optifitcluster_hpp */ mothur-1.48.0/source/optionparser.cpp000077500000000000000000000301371424121717000176610ustar00rootroot00000000000000/* * optionparser.cpp * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "optionparser.h" /***********************************************************************/ OptionParser::OptionParser(string option, vector parametersAllowedByThisCommand) { try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); ValidParameters validParameter; fillFileTypes(fileTypes); if (option != "") { string key, value; //reads in parameters and values while((option.find_first_of(',') != -1)) { //while there are parameters util.splitAtComma(value, option); util.splitAtEquals(key, value); if ((key == "candidate") || (key == "query")) { key = "fasta"; } if (key == "template") { key = "reference"; } key = util.splitWhiteSpace(key).front(); //if value is wrapped in '' preserve spaces if ((value[0] == '\'') && (value[(value.length()-1)] == '\'')) { value = value.substr(1); value = value.substr(0, (value.length()-1)); } else { trimWhiteSpace(value); } if (!validParameter.isValidParameter(key, parametersAllowedByThisCommand, value)) {} //ignore invalid parameters else { parameters[key] = value; } } //in case there is no comma and to get last parameter after comma util.splitAtEquals(key, option); if ((key == "candidate") || (key == "query")) { key = "fasta"; } if (key == "template") { key = "reference"; } key = util.splitWhiteSpace(key).front(); //if value is wrapped in '' preserve spaces if ((option[0] == '\'') && (option[(option.length()-1)] == '\'')) { option = option.substr(1); option = option.substr(0, (option.length()-1)); } else { trimWhiteSpace(option); } if (!validParameter.isValidParameter(key, parametersAllowedByThisCommand, option)) {} //ignore invalid parameters else { parameters[key] = option; } } } catch(exception& e) { m->errorOut(e, "OptionParser", "OptionParser"); exit(1); } } /***********************************************************************/ OptionParser::OptionParser(string option, map& copy) { try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); fillFileTypes(fileTypes); if (option != "") { string key, value; //reads in parameters and values while((option.find_first_of(',') != -1)) { //while there are parameters util.splitAtComma(value, option); util.splitAtEquals(key, value); if ((key == "candidate") || (key == "query")) { key = "fasta"; } if (key == "template") { key = "reference"; } key = util.splitWhiteSpace(key).front(); //if value is wrapped in '' preserve spaces if ((value[0] == '\'') && (value[(value.length()-1)] == '\'')) { value = value.substr(1); value = value.substr(0, (value.length()-1)); } else { value = util.splitWhiteSpace(value).front(); } parameters[key] = value; } //in case there is no comma and to get last parameter after comma util.splitAtEquals(key, option); if ((key == "candidate") || (key == "query")) { key = "fasta"; } if (key == "template") { key = "reference"; } key = util.splitWhiteSpace(key).front(); //if value is wrapped in '' preserve spaces if ((option[0] == '\'') && (option[(option.length()-1)] == '\'')) { option = option.substr(1); option = option.substr(0, (option.length()-1)); } else { option = util.splitWhiteSpace(option).front(); } parameters[key] = option; } copy = parameters; } catch(exception& e) { m->errorOut(e, "OptionParser", "OptionParser"); exit(1); } } /***********************************************************************/ map OptionParser::getParameters() { try { //loop through parameters and look for "current" so you can return the appropriate file //doing it here to avoid code duplication in each of the commands map::iterator it; for (it = parameters.begin(); it != parameters.end();) { if (it->second == "current") { //look for file types if (it->first == "fasta") { it->second = current->getFastaFile(); }else if (it->first == "qfile") { it->second = current->getQualFile(); }else if (it->first == "phylip") { it->second = current->getPhylipFile(); }else if (it->first == "column") { it->second = current->getColumnFile(); }else if (it->first == "list") { it->second = current->getListFile(); }else if (it->first == "rabund") { it->second = current->getRabundFile(); }else if (it->first == "clr") { it->second = current->getCLRFile(); }else if (it->first == "sabund") { it->second = current->getSabundFile(); }else if (it->first == "name") { it->second = current->getNameFile(); }else if (it->first == "group") { it->second = current->getGroupFile(); }else if (it->first == "order") { it->second = current->getOrderFile(); }else if (it->first == "ordergroup") { it->second = current->getOrderGroupFile(); }else if (it->first == "tree") { it->second = current->getTreeFile(); }else if (it->first == "shared") { it->second = current->getSharedFile(); }else if (it->first == "relabund") { it->second = current->getRelAbundFile(); }else if (it->first == "design") { it->second = current->getDesignFile(); }else if (it->first == "sff") { it->second = current->getSFFFile(); }else if (it->first == "flow") { it->second = current->getFlowFile(); }else if (it->first == "oligos") { it->second = current->getOligosFile(); }else if (it->first == "accnos") { it->second = current->getAccnosFile(); }else if (it->first == "taxonomy") { it->second = current->getTaxonomyFile(); }else if (it->first == "constaxonomy") { it->second = current->getConsTaxonomyFile(); }else if (it->first == "contigsreport") { it->second = current->getContigsReportFile(); }else if (it->first == "biom") { it->second = current->getBiomFile(); }else if (it->first == "count") { it->second = current->getCountFile(); }else if (it->first == "summary") { it->second = current->getSummaryFile(); }else if (it->first == "file") { it->second = current->getFileFile(); }else if (it->first == "sample") { it->second = current->getSampleFile(); }else { m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine(); } if (it->second == "") { //no file was saved for that type, warn and remove from parameters m->mothurOut("[WARNING]: no file was saved for " + it->first + " parameter.\n"); parameters.erase(it++); }else { m->mothurOut("Using " + it->second + " as input file for the " + it->first + " parameter.\n"); it++; } }else{ it++; } } vector inputDirs = current->getInputDir(); if (inputDirs.size() != 0) { for (it = parameters.begin(); it != parameters.end(); it++) { if (fileTypes.count(it->first) != 0) { string path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { string inputLocation = it->second; util.checkSpecificLocations(inputLocation, inputDirs, ""); it->second = inputLocation; } } } } return parameters; } catch(exception& e) { m->errorOut(e, "OptionParser", "getParameters"); exit(1); } } /***********************************************************************/ void OptionParser::fillFileTypes(set& fileTypes) { try { fileTypes.insert("fasta"); fileTypes.insert("qfile"); fileTypes.insert("phylip"); fileTypes.insert("column"); fileTypes.insert("list"); fileTypes.insert("rabund"); fileTypes.insert("clr"); fileTypes.insert("sabund"); fileTypes.insert("name"); fileTypes.insert("group"); fileTypes.insert("order"); fileTypes.insert("ordergroup"); fileTypes.insert("tree"); fileTypes.insert("shared"); fileTypes.insert("relabund"); fileTypes.insert("design"); fileTypes.insert("sff"); fileTypes.insert("sfftxt"); fileTypes.insert("flow"); fileTypes.insert("oligos"); fileTypes.insert("accnos"); fileTypes.insert("taxonomy"); fileTypes.insert("constaxonomy"); fileTypes.insert("contigsreport"); fileTypes.insert("biom"); fileTypes.insert("count"); fileTypes.insert("summary"); fileTypes.insert("file"); fileTypes.insert("sample"); fileTypes.insert("list"); fileTypes.insert("rabund"); fileTypes.insert("clr"); fileTypes.insert("sabund"); fileTypes.insert("reference"); fileTypes.insert("conservation"); fileTypes.insert("quantile"); fileTypes.insert("reffasta"); fileTypes.insert("refcolumn"); fileTypes.insert("reflist"); fileTypes.insert("refname"); fileTypes.insert("refcount"); fileTypes.insert("reftaxonomy"); fileTypes.insert("axes"); fileTypes.insert("metadata"); fileTypes.insert("refname"); fileTypes.insert("repfasta"); fileTypes.insert("oldfasta"); fileTypes.insert("hard"); fileTypes.insert("alignreport"); fileTypes.insert("report"); fileTypes.insert("corraxes"); fileTypes.insert("otucorr"); fileTypes.insert("accnos"); fileTypes.insert("phylip1"); fileTypes.insert("phylip2"); fileTypes.insert("picrust"); fileTypes.insert("ffastq"); fileTypes.insert("rfastq"); fileTypes.insert("ffasta"); fileTypes.insert("rfasta"); fileTypes.insert("fqfile"); fileTypes.insert("rqfile"); fileTypes.insert("findex"); fileTypes.insert("rindex"); fileTypes.insert("error"); fileTypes.insert("xml"); fileTypes.insert("ecoli"); fileTypes.insert("map"); fileTypes.insert("lookup"); fileTypes.insert("project"); fileTypes.insert("mimark"); fileTypes.insert("vsearch"); fileTypes.insert("blast"); fileTypes.insert("uchime"); fileTypes.insert("prefetch"); fileTypes.insert("fasterq-dump"); fileTypes.insert("input"); //uchime, vsearch, prefetch and fasterq-dump are not included } catch(exception& e) { m->errorOut(e, "OptionParser", "fillFileTypes"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/optionparser.h000077500000000000000000000015431424121717000173250ustar00rootroot00000000000000#ifndef OPTIONPARSER_H #define OPTIONPARSER_H /* * optionparser.h * Mothur * * Created by Sarah Westcott on 6/8/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" #include "command.hpp" #include "utils.hpp" #include "currentfile.h" /***********************************************************************/ class OptionParser { public: OptionParser(string, vector); OptionParser(string, map&); ~OptionParser() = default; map getParameters(); //adds inputdir to parameters if indicated private: map parameters; MothurOut* m; CurrentFile* current; Utils util; set fileTypes; void fillFileTypes(set&); }; /***********************************************************************/ #endif mothur-1.48.0/source/overlap.cpp000077500000000000000000000054331424121717000166050ustar00rootroot00000000000000/* * overlap.cpp * * * Created by Pat Schloss on 12/15/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * * This class cleans up the alignment at the 3' end of the alignments. Because the Gotoh and Needleman-Wunsch * algorithms start the traceback from the lower-right corner of the dynamic programming matrix, there may be a lot of * scattered bases in the alignment near the 3' end of the alignment. Here we basically look for the largest score * in the last column and row to determine whether there should be exta gaps in sequence A or sequence B. The gap * issues at the 5' end of the alignment seem to take care of themselves in the traceback. * */ #include "alignmentcell.hpp" #include "overlap.hpp" /**************************************************************************************************/ int Overlap::maxRow(vector >& alignment, const int band){ float max = -100; int end = lA - 1; int index = end; for(int i=band;i= max){ // score. index = i; max = alignment[i][end].cValue; } } return index; } /**************************************************************************************************/ int Overlap::maxColumn(vector >& alignment, const int band){ float max = -100; int end = lB - 1; int index = end; for(int i=band;i= max){ // alignment score. index = i; max = alignment[end][i].cValue; } } return index; } /**************************************************************************************************/ void Overlap::setOverlap(vector >& alignment, const int nA, const int nB, const int band=0){ lA = nA; lB = nB; int rowIndex = maxRow(alignment, band); // get the index for the row with the highest right hand side score int colIndex = maxColumn(alignment, band); // get the index for the column with the highest bottom row score int row = lB-1; int column = lA-1; if(colIndex == column && rowIndex == row){} // if the max values are the lower right corner, then we're good else if(alignment[row][colIndex].cValue < alignment[rowIndex][column].cValue){ for(int i=rowIndex+1;i >&, const int, const int, const int); private: int maxRow(vector >&, const int); int maxColumn(vector >&, const int); int lA, lB; }; /**************************************************************************************************/ #endif mothur-1.48.0/source/raredisplay.cpp000077500000000000000000000055741424121717000174620ustar00rootroot00000000000000/* * raredisplay.cpp * Dotur * * Created by Sarah Westcott on 11/18/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "raredisplay.h" /***********************************************************************/ void RareDisplay::init(string label){ try { lock_guard guard(mutex); this->label = label; } catch(exception& e) { m->errorOut(e, "RareDisplay", "init"); exit(1); } } /***********************************************************************/ void RareDisplay::update(SAbundVector& rank){ try { lock_guard guard(mutex); int newNSeqs = rank.getNumSeqs(); vector data = estimate->getValues(&rank); map >::iterator it = results.find(newNSeqs); if (it == results.end()) { //first iter for this count vector temp; temp.push_back(data[0]); results[newNSeqs] = temp; }else { it->second.push_back(data[0]); } } catch(exception& e) { m->errorOut(e, "RareDisplay", "update"); exit(1); } } /***********************************************************************/ void RareDisplay::update(vector shared, int numSeqs) { try { lock_guard guard(mutex); vector data = estimate->getValues(shared); map >::iterator it = results.find(numSeqs); if (it == results.end()) { //first iter for this count vector temp; temp.push_back(data[0]); results[numSeqs] = temp; }else { it->second.push_back(data[0]); } } catch(exception& e) { m->errorOut(e, "RareDisplay", "update"); exit(1); } } /***********************************************************************/ void RareDisplay::reset(){ try { lock_guard guard(mutex); nIters++; } catch(exception& e) { m->errorOut(e, "RareDisplay", "reset"); exit(1); } } /***********************************************************************/ //assumes only one thread will run close void RareDisplay::close(){ try { output->setLabelName(label); for (map >::iterator it = results.begin(); it != results.end(); it++) { vector data(3,0); sort((it->second).begin(), (it->second).end()); vector thisResults = it->second; double meanResults = util.getAverage(thisResults); data[0] = meanResults; data[1] = (it->second)[(int)(0.025*(nIters-1))]; data[2] = (it->second)[(int)(0.975*(nIters-1))]; output->updateOutput(it->first, data); } nIters = 1; results.clear(); output->resetFile(); } catch(exception& e) { m->errorOut(e, "RareDisplay", "close"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/raredisplay.h000077500000000000000000000020061424121717000171120ustar00rootroot00000000000000#ifndef RAREDISPLAY_H #define RAREDISPLAY_H #include "sabundvector.hpp" #include "calculator.h" #include "fileoutput.h" #include "display.h" /***********************************************************************/ //Each display is responsible for one calculator. The FileOutput class handles creating the outputfile for the calc. //This class uses mutex and lock_guard to prevent thread errors. class RareDisplay : public Display { public: RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1) {}; ~RareDisplay() { delete estimate; delete output; } void init(string); void reset(); void update(SAbundVector&); void update(vector shared, int numSeqs); void close(); bool isCalcMultiple() { return estimate->getMultiple(); } private: Calculator* estimate; FileOutput* output; string label; map > results; //maps seqCount to results for that number of sequences int nIters; Utils util; std::mutex mutex; }; #endif mothur-1.48.0/source/rarefact.cpp000077500000000000000000000234111424121717000167200ustar00rootroot00000000000000/* * rarefact.cpp * Dotur * * Created by Sarah Westcott on 11/18/08. * Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "rarefact.h" /**************************************************************************************************/ struct singleRarefactData { long long nIters, numSeqs; MothurOut* m; Utils util; OrderVector order; set ends; vector displays; string label; int increment; singleRarefactData(){} singleRarefactData(long long st, Utils u, OrderVector o, set ed, vector& dis, string l, long long ns, int inc) { m = MothurOut::getInstance(); nIters = st; util = u; order = o; ends = ed; displays = dis; label = l; numSeqs = ns; increment = inc; } }; /***********************************************************************/ int singleDriver(singleRarefactData* params){ try { RarefactionCurveData rcd; rcd.registerDisplays(params->displays); for(int iter=0;iternIters;iter++){ for(int i=0;idisplays.size();i++){ params->displays[i]->init(params->label); } RAbundVector lookup(params->order.getNumBins()); SAbundVector rank(params->order.getMaxRank()+1); params->util.mothurRandomShuffle(params->order); for(int i=0;inumSeqs;i++){ if (params->m->getControl_pressed()) { return 0; } int binNumber = params->order.get(i); int abundance = lookup.get(binNumber); rank.set(abundance, rank.get(abundance)-1); abundance++; lookup.set(binNumber, abundance); rank.set(abundance, rank.get(abundance)+1); if((i == 0) || ((i+1) % params->increment == 0) || (params->ends.count(i+1) != 0)){ rcd.updateRankData(rank); } } if((params->numSeqs % params->increment != 0) || (params->ends.count(params->numSeqs) != 0)){ rcd.updateRankData(rank); } for(int i=0;idisplays.size();i++){ params->displays[i]->reset(); } } return 0; } catch(exception& e) { params->m->errorOut(e, "Rarefact", "singleDriver"); exit(1); } } /***********************************************************************/ int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){ try { //convert freq percentage to number int increment = 1; if (percentFreq < 1.0) { increment = numSeqs * percentFreq; } else { increment = percentFreq; } vector lines; if (processors > (nIters)) { processors = nIters; } //figure out how many sequences you have to process int numItersPerProcessor = nIters / processors; for (int i = 0; i < processors; i++) { if(i == (processors - 1)){ numItersPerProcessor = (nIters) - i * numItersPerProcessor; } lines.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of order so we don't get access violations OrderVector newOrder(order); singleRarefactData* dataBundle = new singleRarefactData(lines[i+1], util, newOrder, ends, displays, label, numSeqs, increment); data.push_back(dataBundle); workerThreads.push_back(new std::thread(singleDriver, dataBundle)); } //make copy of lookup so we don't get access violations OrderVector newOrder(order); singleRarefactData* dataBundle = new singleRarefactData(lines[0], util, newOrder, ends, displays, label, numSeqs, increment); singleDriver(dataBundle); for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); delete data[i]; delete workerThreads[i]; } delete dataBundle; for(int i=0;iclose(); } return 0; } catch(exception& e) { m->errorOut(e, "Rarefact", "getCurve"); exit(1); } } /**************************************************************************************************/ struct sharedRarefactData { long long nIters; MothurOut* m; Utils util; vector lookup; vector displays; string label; bool jumble; sharedRarefactData(){} sharedRarefactData(long long st, Utils u, vector& o, vector& dis, string l, bool ns) { m = MothurOut::getInstance(); nIters = st; util = u; lookup = o; displays = dis; label = l; jumble = ns; } ~sharedRarefactData(){ for(int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } }; /**************************************************************************************/ void mergeVectors(SharedRAbundVector* shared1, SharedRAbundVector* shared2, MothurOut* m) { try{ for (int k = 0; k < shared1->getNumBins(); k++) { //merge new species into shared1 shared1->set(k, (shared1->get(k) + shared2->get(k))); //set to 'combo' since this vector now contains multiple groups } } catch(exception& e) { m->errorOut(e, "Rarefact", "mergeVectors"); exit(1); } } /***********************************************************************/ int sharedDriver(sharedRarefactData* params){ try { SharedRarefactionCurveData rcd; rcd.registerDisplays(params->displays); //register the displays rcd.registerDisplays(params->displays); for(int iter=0;iternIters;iter++){ for(int i=0;idisplays.size();i++){ params->displays[i]->init(params->label); } //randomize the groups if (params->jumble) { params->util.mothurRandomShuffle(params->lookup); } //make merge the size of lookup[0] SharedRAbundVector* merge = new SharedRAbundVector(params->lookup[0]->getNumBins()); //make copy of lookup zero for(int i = 0; ilookup[0]->getNumBins(); i++) { merge->set(i, params->lookup[0]->get(i)); } vector subset; //send each group one at a time for (int k = 1; k < params->lookup.size(); k++) { if (params->m->getControl_pressed()) { delete merge; return 0; } subset.clear(); //clears out old pair of sharedrabunds //add in new pair of sharedrabunds subset.push_back(merge); subset.push_back(params->lookup[k]); rcd.updateSharedData(subset, k+1); //, params->numGroupComb mergeVectors(merge, params->lookup[k], params->m); } //resets output files for(int i=0;idisplays.size();i++){ params->displays[i]->reset(); } delete merge; } return 0; } catch(exception& e) { params->m->errorOut(e, "Rarefact", "sharedDriver"); exit(1); } } /***********************************************************************/ int Rarefact::getSharedCurve(float percentFreq = 0.01, int nIters = 1000){ try { //if jumble is false all iters will be the same if (!jumble) { nIters = 1; } vector lines; if (processors > (nIters)) { processors = nIters; } //figure out how many sequences you have to process int numItersPerProcessor = nIters / processors; for (int i = 0; i < processors; i++) { if(i == (processors - 1)){ numItersPerProcessor = (nIters) - i * numItersPerProcessor; } lines.push_back(numItersPerProcessor); } //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { //make copy of lookup so we don't get access violations vector copyLookup = lookup->getSharedRAbundVectors(); label = copyLookup[0]->getLabel(); sharedRarefactData* dataBundle = new sharedRarefactData(lines[i+1], util, copyLookup, displays, label, jumble); data.push_back(dataBundle); workerThreads.push_back(new std::thread(sharedDriver, dataBundle)); } //make copy of lookup so we don't get access violations vector copyLookup = lookup->getSharedRAbundVectors(); label = copyLookup[0]->getLabel(); sharedRarefactData* dataBundle = new sharedRarefactData(lines[0], util, copyLookup, displays, label, jumble); sharedDriver(dataBundle); for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); delete data[i]; delete workerThreads[i]; } delete dataBundle; for(int i=0;iclose(); } return 0; } catch(exception& e) { m->errorOut(e, "Rarefact", "getSharedCurve"); exit(1); } } /**************************************************************************************/ mothur-1.48.0/source/rarefact.h000077500000000000000000000020161424121717000163630ustar00rootroot00000000000000#ifndef RAREFACT_H #define RAREFACT_H #include "rarefactioncurvedata.h" #include "raredisplay.h" #include "ordervector.hpp" #include "sharedrabundvectors.hpp" #include "mothur.h" class Rarefact { public: Rarefact(OrderVector& o, vector disp, set en, int proc) : numSeqs(o.getNumSeqs()), order(o), displays(disp), label(o.getLabel()), ends(en) { m = MothurOut::getInstance(); jumble = false; processors = proc; } Rarefact(SharedRAbundVectors* shared, vector disp, bool j, int proc) : lookup(shared), displays(disp), jumble(j) { m = MothurOut::getInstance(); processors = proc; } ~Rarefact(){}; int getCurve(float, int); int getSharedCurve(float, int); private: OrderVector order; vector displays; int numSeqs, numGroupComb, processors; string label; set ends; void mergeVectors(SharedRAbundVector*, SharedRAbundVector*); SharedRAbundVectors* lookup; MothurOut* m; bool jumble; Utils util; int driver(vector&, int, int); }; #endif mothur-1.48.0/source/rarefactioncurvedata.h000077500000000000000000000031001424121717000207630ustar00rootroot00000000000000#ifndef RAREFACTIONCURVEDATA_H #define RAREFACTIONCURVEDATA_H #include "mothur.h" #include "sabundvector.hpp" #include "display.h" #include "observable.h" /***********************************************************************/ //Has a display for each calculator class RarefactionCurveData : public Observable { public: RarefactionCurveData() : rank(0) {}; void registerDisplay(Display* o) { displays.insert(o); } void registerDisplays(vector o) { for(int i=0;i::iterator pos=displays.begin();pos!=displays.end();pos++){ (*pos)->update(rank); } } private: set displays; SAbundVector rank; }; /***********************************************************************/ class SharedRarefactionCurveData : public Observable { public: SharedRarefactionCurveData() = default;; //: shared1(0), shared2(0) void registerDisplay(Display* o) { displays.insert(o); } void registerDisplays(vector o) { for(int i=0;i r, int numSeqs) { shared = r; NumSeqs = numSeqs; for(set::iterator pos=displays.begin();pos!=displays.end();pos++){ (*pos)->update(shared, NumSeqs); } } private: set displays; vector shared; int NumSeqs, NumGroupComb; }; /***********************************************************************/ #endif mothur-1.48.0/source/read/000077500000000000000000000000001424121717000153345ustar00rootroot00000000000000mothur-1.48.0/source/read/readblast.cpp000077500000000000000000000265001424121717000200070ustar00rootroot00000000000000/* * readblast.cpp * Mothur * * Created by westcott on 12/10/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "readblast.h" //******************************************************************************************************************** //sorts lowest to highest inline bool compareOverlap(seqDist left, seqDist right){ return (left.dist < right.dist); } /*********************************************************************************************/ ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms) { try { m = MothurOut::getInstance(); matrix = nullptr; } catch(exception& e) { m->errorOut(e, "ReadBlast", "ReadBlast"); exit(1); } } /*********************************************************************************************/ //assumptions about the blast file: //1. if duplicate lines occur the first line is always best and is chosen //2. blast scores are grouped together, ie. a a .... score, a b .... score, a c ....score... int ReadBlast::read(NameAssignment* nameMap) { try { //if the user has not given a names file read names from blastfile if (nameMap->size() == 0) { readNames(nameMap); } int nseqs = nameMap->size(); if (m->getControl_pressed()) { return 0; } ifstream fileHandle; util.openInputFile(blastfile, fileHandle); string firstName, secondName, eScore, currentRow; string repeatName = ""; int count = 1; float distance, thisoverlap, refScore; refScore = 0.0; float percentId; float numBases, mismatch, gap, startQuery, endQuery, startRef, endRef, score, lengthThisSeq; ofstream outDist; ofstream outOverlap; //create objects needed for read matrix = new SparseDistanceMatrix(); matrix->resize(nseqs); if (m->getControl_pressed()) { fileHandle.close(); delete matrix; return 0; } //this is used to quickly find if we already have a distance for this combo vector< map > dists; dists.resize(nseqs); //dists[0][1] = distance from seq0 to seq1 map thisRowsBlastScores; if (!fileHandle.eof()) { //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); currentRow = firstName; lengthThisSeq = numBases; repeatName = firstName + secondName; if (firstName == secondName) { refScore = score; } else{ //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { seqDist overlapValue(itA->second, itB->second, thisoverlap); overlap.push_back(overlapValue); } } }else { m->mothurOut("Error in your blast file, cannot read.\n"); exit(1); } //read file while(!fileHandle.eof()){ if (m->getControl_pressed()) { fileHandle.close(); delete matrix; return 0; } //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; gobble(fileHandle); string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file //if this is a new pairing if (temp != repeatName) { repeatName = temp; if (currentRow == firstName) { if (firstName == secondName) { refScore = score; count++; }else{ //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } //save score thisRowsBlastScores[itB->second] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { seqDist overlapValue(itA->second, itB->second, thisoverlap); overlap.push_back(overlapValue); } } //end else }else { //end row //convert blast scores to distance and add cell to sparse matrix if we can map::iterator it; map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameMap->find(currentRow); itDist = dists[it->first].find(itA->second); //if we have it then compare if (itDist != dists[it->first].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { if (itA->second < it->first) { PDistCell value(it->first, distance); matrix->addCell(itA->second, value); }else { PDistCell value(itA->second, distance); matrix->addCell(it->first, value); } } //not going to need this again dists[it->first].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out last rows info thisRowsBlastScores.clear(); currentRow = firstName; lengthThisSeq = numBases; //add this row to thisRowsBlastScores if (firstName == secondName) { refScore = score; } else{ //add this row to thisRowsBlastScores //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; //calc overlap score thisoverlap = 1.0 - (percentId * (lengthThisSeq - startQuery) / endRef / 100.0 - penalty); //if there is a valid overlap, add it if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap <= cutoff)) { seqDist overlapValue(itA->second, itB->second, thisoverlap); overlap.push_back(overlapValue); } } }//end if current row }//end if repeat }//end while //get last rows info stored //convert blast scores to distance and add cell to sparse matrix if we can map::iterator it; map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); //do we already have the distance calculated for b->a map::iterator itA = nameMap->find(currentRow); itDist = dists[it->first].find(itA->second); //if we have it then compare if (itDist != dists[it->first].end()) { //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } //is this distance below cutoff if (distance <= cutoff) { if (itA->second < it->first) { PDistCell value(it->first, distance); matrix->addCell(itA->second, value); }else { PDistCell value(itA->second, distance); matrix->addCell(it->first, value); } } //not going to need this again dists[it->first].erase(itDist); }else { //save this value until we get the other ratio dists[itA->second][it->first] = distance; } } //clear out info thisRowsBlastScores.clear(); dists.clear(); if (m->getControl_pressed()) { fileHandle.close(); delete matrix; return 0; } sort(overlap.begin(), overlap.end(), compareOverlap); if (m->getControl_pressed()) { fileHandle.close(); delete matrix; return 0; } fileHandle.close(); return 0; } catch(exception& e) { m->errorOut(e, "ReadBlast", "read"); exit(1); } } /*********************************************************************************************/ int ReadBlast::readNames(NameAssignment* nameMap) { try { m->mothurOut("Reading names... "); cout.flush(); string name, hold, prevName; int num = 1; ifstream in; util.openInputFile(blastfile, in); //ofstream outName; //util.openOutputFile((blastfile + ".tempOutNames"), outName); //read first line in >> prevName; for (int i = 0; i < 11; i++) { in >> hold; } gobble(in); //save name in nameMap nameMap->push_back(prevName); while (!in.eof()) { if (m->getControl_pressed()) { in.close(); return 0; } //read line in >> name; for (int i = 0; i < 11; i++) { in >> hold; } gobble(in); //is this a new name? if (name != prevName) { prevName = name; if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups. Are you sequence names unique? quitting.\n"); m->setControl_pressed(true); } else { nameMap->push_back(name); } //outName << name << '\t' << name << endl; num++; } } in.close(); //write out names file //string outNames = util.getRootName(blastfile) + "names"; //ofstream out; //util.openOutputFile(outNames, out); //nameMap->print(out); //out.close(); if (m->getControl_pressed()) { return 0; } m->mothurOut(toString(num) + " names read.\n"); return 0; } catch(exception& e) { m->errorOut(e, "ReadBlast", "readNames"); exit(1); } } /*********************************************************************************************/ mothur-1.48.0/source/read/readblast.h000077500000000000000000000030731424121717000174540ustar00rootroot00000000000000#ifndef READBLAST_H #define READBLAST_H /* * readblast.h * Mothur * * Created by westcott on 12/10/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "sparsedistancematrix.h" #include "nameassignment.hpp" /****************************************************************************************/ //Note: this class creates a sparsematrix and list if the read is executed, but does not delete them on deconstruction. //the user of this object is responsible for deleting the matrix and list if they call the read or there will be a memory leak //it is done this way so the read can be deleted and the information still used. class ReadBlast { public: ReadBlast(string, float, float, int, bool); //blastfile, cutoff, penalty, length of overlap, min or max bsr ~ReadBlast() = default; int read(NameAssignment*); SparseDistanceMatrix* getDistMatrix() { return matrix; } vector getOverlapMatrix() { return overlap; } string getOverlapFile() { return overlapFile; } string getDistFile() { return distFile; } private: string blastfile, overlapFile, distFile; int length; //number of amino acids overlapped float penalty, cutoff; //penalty is used to adjust error rate bool minWanted; //if true choose min bsr, if false choose max bsr SparseDistanceMatrix* matrix; vector overlap; MothurOut* m; Utils util; int readNames(NameAssignment*); }; /*******************************************************************************************/ #endif mothur-1.48.0/source/read/readcluster.cpp000077500000000000000000000244161424121717000203670ustar00rootroot00000000000000/* * readcluster.cpp * Mothur * * Created by westcott on 10/28/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "readcluster.h" /***********************************************************************/ ReadCluster::ReadCluster(string distfile, float c, string o, bool s){ m = MothurOut::getInstance(); distFile = distfile; cutoff = c; outputDir = o; sortWanted = s; list = nullptr; } /***********************************************************************/ int ReadCluster::read(NameAssignment*& nameMap){ try { if (format == "phylip") { convertPhylip2Column(nameMap); } else { list = new ListVector(nameMap->getListVector()); } if (m->getControl_pressed()) { return 0; } if (sortWanted) { OutPutFile = util.sortFile(distFile, outputDir); } else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column return 0; } catch(exception& e) { m->errorOut(e, "ReadCluster", "read"); exit(1); } } /***********************************************************************/ int ReadCluster::read(CountTable*& ct){ try { if (format == "phylip") { convertPhylip2Column(ct); } else { list = new ListVector(ct->getListVector()); } if (m->getControl_pressed()) { return 0; } if (sortWanted) { OutPutFile = util.sortFile(distFile, outputDir); } else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column return 0; } catch(exception& e) { m->errorOut(e, "ReadCluster", "read"); exit(1); } } /***********************************************************************/ int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){ try { //convert phylip file to column file map rowToName; map::iterator it; string tempFile = distFile + ".column.temp"; ifstream in; util.openInputFile(distFile, in); gobble(in); ofstream out; util.openOutputFile(tempFile, out); float distance; int square, nseqs; square = 0; string name; vector matrixNames; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } rowToName[0] = name; matrixNames.push_back(name); if(nameMap == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(nameMap->getListVector()); if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } } char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } if(square == 0){ for(int i=1;i> name; rowToName[i] = name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(nameMap == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff){ out << i << '\t' << j << '\t' << distance << endl; } } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff){ out << i << '\t' << j << '\t' << distance << endl; } } } } } else{ for(int i=1;i> name; rowToName[i] = name; matrixNames.push_back(name); if(nameMap == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff && j < i){ out << i << '\t' << j << '\t' << distance << endl; } } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff && j < i){ out << i << '\t' << j << '\t' << distance << endl; } } } } } list->setLabel("0"); in.close(); out.close(); if(nameMap == nullptr){ nameMap = new NameAssignment(); for(int i=0;ipush_back(matrixNames[i]); } } ifstream in2; ofstream out2; string outputFile = util.getRootName(distFile) + "column.dist"; util.openInputFile(tempFile, in2); util.openOutputFile(outputFile, out2); int first, second; float dist; while (in2) { if (m->getControl_pressed()) { in2.close(); out2.close(); util.mothurRemove(tempFile); util.mothurRemove(outputFile); return 0; } in2 >> first >> second >> dist; out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; gobble(in2); } in2.close(); out2.close(); util.mothurRemove(tempFile); distFile = outputFile; if (m->getControl_pressed()) { util.mothurRemove(outputFile); } return 0; } catch(exception& e) { m->errorOut(e, "ReadCluster", "convertPhylip2Column"); exit(1); } } /***********************************************************************/ int ReadCluster::convertPhylip2Column(CountTable*& ct){ try { //convert phylip file to column file map rowToName; map::iterator it; string tempFile = distFile + ".column.temp"; ifstream in; util.openInputFile(distFile, in); gobble(in); ofstream out; util.openOutputFile(tempFile, out); float distance; int square, nseqs; string name; vector matrixNames; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } rowToName[0] = name; matrixNames.push_back(name); if(ct == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(ct->getListVector()); } char d; while((d=in.get()) != EOF){ if(isalnum(d)){ square = 1; in.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } if(square == 0){ for(int i=1;i> name; rowToName[i] = name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(ct == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff){ out << i << '\t' << j << '\t' << distance << endl; } } } else{ for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff){ out << i << '\t' << j << '\t' << distance << endl; } } } } } else{ for(int i=1;i> name; rowToName[i] = name; matrixNames.push_back(name); if(ct == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff && j < i){ out << i << '\t' << j << '\t' << distance << endl; } } } else{ for(int j=0;jgetControl_pressed()) { in.close(); out.close(); util.mothurRemove(tempFile); return 0; } in >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } if(distance <= cutoff && j < i){ out << i << '\t' << j << '\t' << distance << endl; } } } } } list->setLabel("0"); in.close(); out.close(); if(ct == nullptr){ ct = new CountTable(); for(int i=0;ipush_back(matrixNames[i]); } } ifstream in2; ofstream out2; string outputFile = util.getRootName(distFile) + "column.dist"; util.openInputFile(tempFile, in2); util.openOutputFile(outputFile, out2); int first, second; float dist; while (in2) { if (m->getControl_pressed()) { in2.close(); out2.close(); util.mothurRemove(tempFile); util.mothurRemove(outputFile); return 0; } in2 >> first >> second >> dist; out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; gobble(in2); } in2.close(); out2.close(); util.mothurRemove(tempFile); distFile = outputFile; if (m->getControl_pressed()) { util.mothurRemove(outputFile); } return 0; } catch(exception& e) { m->errorOut(e, "ReadCluster", "convertPhylip2Column"); exit(1); } } /***********************************************************************/ ReadCluster::~ReadCluster(){} /***********************************************************************/ mothur-1.48.0/source/read/readcluster.h000077500000000000000000000016721424121717000200330ustar00rootroot00000000000000#ifndef READCLUSTER_H #define READCLUSTER_H /* * readcluster.h * Mothur * * Created by westcott on 10/28/09. * Copyright 2009 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "nameassignment.hpp" #include "listvector.hpp" #include "counttable.h" #include "utils.hpp" /******************************************************/ class ReadCluster { public: ReadCluster(string, float, string, bool); ~ReadCluster(); int read(NameAssignment*&); int read(CountTable*&); string getOutputFile() { return OutPutFile; } void setFormat(string f) { format = f; } ListVector* getListVector() { return list; } private: string distFile, outputDir; string OutPutFile, format; ListVector* list; float cutoff; MothurOut* m; bool sortWanted; Utils util; int convertPhylip2Column(NameAssignment*&); int convertPhylip2Column(CountTable*&); }; /******************************************************/ #endif mothur-1.48.0/source/read/readcolumn.cpp000077500000000000000000000205561424121717000202040ustar00rootroot00000000000000/* * readcolumn.cpp * Mothur * * Created by Sarah Westcott on 4/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "readcolumn.h" /***********************************************************************/ ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){ successOpen = util.openInputFile(distFile, fileHandle); sim = false; } /***********************************************************************/ ReadColumnMatrix::ReadColumnMatrix(string df, bool s) : distFile(df){ successOpen = util.openInputFile(distFile, fileHandle); sim = s; } /***********************************************************************/ int ReadColumnMatrix::read(NameAssignment* nameMap){ try { string firstName, secondName; float distance; int nseqs = nameMap->size(); DMatrix->resize(nseqs); list = new ListVector(nameMap->getListVector()); int lt = 1; int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix //need to see if this is a square or a triangular matrix... while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; // get the row and column names and distance if (m->getDebug()) { cout << firstName << '\t' << secondName << '\t' << distance << endl; } if (m->getControl_pressed()) { fileHandle.close(); return 0; } map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && itA != itB){ if(itA->second > itB->second){ PDistCell value(itA->second, distance); if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... refRow = itA->second; refCol = itB->second; DMatrix->addCell(itB->second, value); } else if(refRow == itA->second && refCol == itB->second){ lt = 0; } else{ DMatrix->addCell(itB->second, value); } } else if(itA->second < itB->second){ PDistCell value(itB->second, distance); if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... refRow = itA->second; refCol = itB->second; DMatrix->addCell(itA->second, value); } else if(refRow == itB->second && refCol == itA->second){ lt = 0; } else{ DMatrix->addCell(itA->second, value); } } } gobble(fileHandle); } if(lt == 0){ // oops, it was square fileHandle.close(); //let's start over DMatrix->clear(); //let's start over util.openInputFile(distFile, fileHandle); //let's start over while(fileHandle){ fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; // get the row and column names and distance if (m->getControl_pressed()) { fileHandle.close(); return 0; } map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && itA->second > itB->second){ PDistCell value(itA->second, distance); DMatrix->addCell(itB->second, value); } gobble(fileHandle); } } if (m->getControl_pressed()) { fileHandle.close(); return 0; } fileHandle.close(); list->setLabel("0"); return 1; } catch(exception& e) { m->errorOut(e, "ReadColumnMatrix", "read"); exit(1); } } /***********************************************************************/ int ReadColumnMatrix::read(CountTable* countTable){ try { string firstName, secondName; float distance; int nseqs = countTable->size(); DMatrix->resize(nseqs); list = new ListVector(countTable->getListVector()); int lt = 1; int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix //need to see if this is a square or a triangular matrix... while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; // get the row and column names and distance if (m->getControl_pressed()) { fileHandle.close(); return 0; } int itA = countTable->get(firstName); int itB = countTable->get(secondName); if (m->getControl_pressed()) { exit(1); } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && itA != itB){ if(itA > itB){ PDistCell value(itA, distance); if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... refRow = itA; refCol = itB; DMatrix->addCell(itB, value); } else if(refRow == itA && refCol == itB){ lt = 0; } else{ DMatrix->addCell(itB, value); } } else if(itA < itB){ PDistCell value(itB, distance); if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... refRow = itA; refCol = itB; DMatrix->addCell(itA, value); } else if(refRow == itB && refCol == itA){ lt = 0; } else{ DMatrix->addCell(itA, value); } } } gobble(fileHandle); } if(lt == 0){ // oops, it was square fileHandle.close(); //let's start over DMatrix->clear(); //let's start over util.openInputFile(distFile, fileHandle); //let's start over while(fileHandle){ fileHandle >> firstName; gobble(fileHandle); fileHandle >> secondName; gobble(fileHandle); fileHandle >> distance; // get the row and column names and distance if (m->getControl_pressed()) { fileHandle.close(); return 0; } int itA = countTable->get(firstName); int itB = countTable->get(secondName); if (m->getControl_pressed()) { exit(1); } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && itA > itB){ PDistCell value(itA, distance); DMatrix->addCell(itB, value); } gobble(fileHandle); } } if (m->getControl_pressed()) { fileHandle.close(); return 0; } fileHandle.close(); list->setLabel("0"); return 1; } catch(exception& e) { m->errorOut(e, "ReadColumnMatrix", "read"); exit(1); } } /***********************************************************************/ ReadColumnMatrix::~ReadColumnMatrix(){} /***********************************************************************/ mothur-1.48.0/source/read/readcolumn.h000077500000000000000000000011161424121717000176400ustar00rootroot00000000000000#ifndef READCOLUMN_H #define READCOLUMN_H /* * readcolumn.h * Mothur * * Created by Sarah Westcott on 4/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "readmatrix.hpp" /******************************************************/ class ReadColumnMatrix : public ReadMatrix { public: ReadColumnMatrix(string); ReadColumnMatrix(string, bool); ~ReadColumnMatrix(); int read(NameAssignment*); int read(CountTable*); private: ifstream fileHandle; string distFile; }; /******************************************************/ #endif mothur-1.48.0/source/read/readmatrix.hpp000077500000000000000000000015761424121717000202210ustar00rootroot00000000000000#ifndef READMATRIX_HPP #define READMATRIX_HPP /* * readmatrix.hpp * * * Created by Pat Schloss on 8/13/08. * Copyright 2008 Patrick D. Schloss. All rights reserved. * */ #include "mothur.h" #include "listvector.hpp" #include "nameassignment.hpp" #include "counttable.h" #include "sparsedistancematrix.h" #include "utils.hpp" class ReadMatrix { public: ReadMatrix(){ DMatrix = new SparseDistanceMatrix(); m = MothurOut::getInstance(); } virtual ~ReadMatrix() = default; virtual int read(NameAssignment*){ return 1; } virtual int read(CountTable*){ return 1; } void setCutoff(float c) { cutoff = c; } SparseDistanceMatrix* getDMatrix() { return DMatrix; } ListVector* getListVector() { return list; } int successOpen; protected: SparseDistanceMatrix* DMatrix; ListVector* list; float cutoff; MothurOut* m; bool sim; Utils util; }; #endif mothur-1.48.0/source/read/readphylip.cpp000077500000000000000000000327271424121717000202170ustar00rootroot00000000000000/* * readphylip.cpp * Mothur * * Created by Sarah Westcott on 4/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "readphylip.h" /***********************************************************************/ ReadPhylipMatrix::ReadPhylipMatrix(string distFile){ successOpen = util.openInputFile(distFile, fileHandle); sim=false; } /***********************************************************************/ ReadPhylipMatrix::ReadPhylipMatrix(string distFile, bool s){ successOpen = util.openInputFile(distFile, fileHandle); sim=s; } /***********************************************************************/ int ReadPhylipMatrix::read(NameAssignment* nameMap){ try { float distance; int square, nseqs; square = 0; string name; vector matrixNames; string numTest; fileHandle >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } matrixNames.push_back(name); if(nameMap == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(nameMap->getListVector()); if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } } char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = 1; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } DMatrix->resize(nseqs); if(square == 0){ int index = 0; for(int i=1;igetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(nameMap == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ PDistCell value(i, distance); DMatrix->addCell(j, value); } index++; } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ PDistCell value(nameMap->get(matrixNames[i]), distance); DMatrix->addCell(nameMap->get(matrixNames[j]), value); } index++; } } } } else{ int index = nseqs; for(int i=1;i> name; matrixNames.push_back(name); if(nameMap == nullptr){ list->set(i, name); for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ PDistCell value(i, distance); DMatrix->addCell(j, value); } index++; } } else{ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct\n"); } for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ PDistCell value(nameMap->get(matrixNames[i]), distance); DMatrix->addCell(nameMap->get(matrixNames[j]), value); } index++; } } } } if (m->getControl_pressed()) { fileHandle.close(); return 0; } list->setLabel("0"); fileHandle.close(); return 1; } catch(exception& e) { m->errorOut(e, "ReadPhylipMatrix", "read"); exit(1); } } /***********************************************************************/ int ReadPhylipMatrix::read(CountTable* countTable){ try { float distance; int square, nseqs; square = 0; string name; vector matrixNames; string numTest; fileHandle >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting.\n"); exit(1); } else { convert(numTest, nseqs); } matrixNames.push_back(name); if(countTable == nullptr){ list = new ListVector(nseqs); list->set(0, name); } else{ list = new ListVector(countTable->getListVector()); } if (m->getControl_pressed()) { return 0; } char d; while((d=fileHandle.get()) != EOF){ if(isalnum(d)){ square = 1; fileHandle.putback(d); for(int i=0;i> distance; } break; } if(d == '\n'){ square = 0; break; } } DMatrix->resize(nseqs); if(square == 0){ int index = 0; for(int i=1;igetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> name; matrixNames.push_back(name); //there's A LOT of repeated code throughout this method... if(countTable == nullptr){ list->set(i, name); for(int j=0;jgetControl_pressed()) { fileHandle.close(); return 0; } fileHandle >> distance; if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ PDistCell value(i, distance); DMatrix->addCell(j, value); } index++; } } else{ for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff){ int iIndex = countTable->get(matrixNames[i]); int jIndex = countTable->get(matrixNames[j]); if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (iIndex < jIndex) { PDistCell value(jIndex, distance); DMatrix->addCell(iIndex, value); }else { PDistCell value(iIndex, distance); DMatrix->addCell(jIndex, value); } } index++; } } } } else{ int index = nseqs; for(int i=1;i> name; matrixNames.push_back(name); if(countTable == nullptr){ list->set(i, name); for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ PDistCell value(i, distance); DMatrix->addCell(j, value); } index++; } } else{ for(int j=0;j> distance; if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (util.isEqual(distance, -1)) { distance = 1000000; } else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(distance <= cutoff && j < i){ int iIndex = countTable->get(matrixNames[i]); int jIndex = countTable->get(matrixNames[j]); if (m->getControl_pressed()) { fileHandle.close(); return 0; } if (iIndex < jIndex) { PDistCell value(jIndex, distance); DMatrix->addCell(iIndex, value); }else { PDistCell value(iIndex, distance); DMatrix->addCell(jIndex, value); } } index++; } } } } if (m->getControl_pressed()) { fileHandle.close(); return 0; } list->setLabel("0"); fileHandle.close(); return 1; } catch(exception& e) { m->errorOut(e, "ReadPhylipMatrix", "read"); exit(1); } } /***********************************************************************/ ReadPhylipMatrix::~ReadPhylipMatrix(){} /***********************************************************************/ mothur-1.48.0/source/read/readphylip.h000077500000000000000000000011141424121717000176460ustar00rootroot00000000000000#ifndef READPHYLIP_H #define READPHYLIP_H /* * readphylip.h * Mothur * * Created by Sarah Westcott on 4/21/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "readmatrix.hpp" /******************************************************/ class ReadPhylipMatrix : public ReadMatrix { public: ReadPhylipMatrix(string); ReadPhylipMatrix(string, bool); ~ReadPhylipMatrix(); int read(NameAssignment*); int read(CountTable*); private: ifstream fileHandle; string distFile; }; /******************************************************/ #endif mothur-1.48.0/source/read/readphylipvector.cpp000077500000000000000000000075441424121717000214410ustar00rootroot00000000000000/* * readphylipvector.cpp * mothur * * Created by westcott on 1/11/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "readphylipvector.h" #include "utils.hpp" /***********************************************************************/ ReadPhylipVector::ReadPhylipVector(string d) { try { m = MothurOut::getInstance(); distFile = d; } catch(exception& e) { m->errorOut(e, "ReadPhylipVector", "ReadPhylipVector"); exit(1); } } /***********************************************************************/ vector ReadPhylipVector::read(vector< vector >& matrix) { try { vector names; ifstream in; Utils util; util.openInputFile(distFile, in); //check whether matrix is square char d; int square = 1; int numSeqs; string name; string numTest; in >> numTest >> name; if (!util.isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ". I suspect you entered a column formatted file as a phylip file, quitting.\n"); exit(1); } else { convert(numTest, numSeqs); } while((d=in.get()) != EOF){ //is d a number meaning its square if(isalnum(d)){ square = 1; break; } //is d a line return meaning its lower triangle if(d == '\n'){ square = 2; break; } } in.close(); //reopen and read now that you know whether you are square ifstream f; util.openInputFile(distFile, f); int rank; f >> rank; names.resize(rank); matrix.resize(rank); if(square == 1){ for(int i=0;i> names[i]; for(int j=0;jgetControl_pressed()) { return names; } f >> matrix[i][j]; if (util.isEqual(matrix[i][j], -0)) matrix[i][j] = 0.0000; } } } else if(square == 2){ for(int i=0;i> names[0]; for(int i=1;i> names[i]; matrix[i][i]=0.0000; for(int j=0;jgetControl_pressed()) { return names; } f >> matrix[i][j]; if (util.isEqual(matrix[i][j], -0)) matrix[i][j] = 0.0000; matrix[j][i]=matrix[i][j]; } } } f.close(); return names; } catch(exception& e) { m->errorOut(e, "ReadPhylipVector", "read"); exit(1); } } /***********************************************************************/ vector ReadPhylipVector::read(vector& matrix) { try { vector names; ifstream in; Utils util; util.openInputFile(distFile, in); //check whether matrix is square char d; int square = 1; int numSeqs; string name; in >> numSeqs >> name; while((d=in.get()) != EOF){ //is d a number meaning its square if(isalnum(d)){ square = 1; break; } //is d a line return meaning its lower triangle if(d == '\n'){ square = 2; break; } } in.close(); //reopen and read now that you know whether you are square ifstream f; util.openInputFile(distFile, f); int rank; float temp; f >> rank; names.resize(rank); if(square == 1){ for(int i=0;i> names[i]; for(int j=0;jgetControl_pressed()) { return names; } f >> temp; if (j < i) { //only save lt seqDist dist(i, j, temp); matrix.push_back(dist); } } } } else if(square == 2){ f >> names[0]; for(int i=1;i> names[i]; for(int j=0;jgetControl_pressed()) { return names; } f >> temp; seqDist dist(i, j, temp); matrix.push_back(dist); } } } f.close(); return names; } catch(exception& e) { m->errorOut(e, "ReadPhylipVector", "read"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/read/readphylipvector.h000077500000000000000000000014511424121717000210750ustar00rootroot00000000000000#ifndef READPHYLIPVECTOR_H #define READPHYLIPVECTOR_H /* * readphylipvector.h * mothur * * Created by westcott on 1/11/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" /******************************************************/ class ReadPhylipVector { public: ReadPhylipVector(string); //phylipfile - lt or square ~ReadPhylipVector() = default; vector read(vector< vector >&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile vector read(vector&); //pass in matrix to fill with values, returns vector of strings containing names in phylipfile private: string distFile; MothurOut* m; }; /******************************************************/ #endif mothur-1.48.0/source/read/readtree.cpp000077500000000000000000000261541424121717000176460ustar00rootroot00000000000000/* * readtree.cpp * Mothur * * Created by Sarah Westcott on 1/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "readtree.h" /* Special characters to trees: , ) ( ; [ ] : */ /***********************************************************************/ ReadTree::ReadTree() { m = MothurOut::getInstance(); } /***********************************************************************/ int ReadTree::AssembleTrees() { try { //assemble users trees for (int i = 0; i < Trees.size(); i++) { if (m->getControl_pressed()) { return 0; } Trees[i]->assembleTree(); } return 0; } catch(exception& e) { m->errorOut(e, "ReadTree", "AssembleTrees"); exit(1); } } /***********************************************************************/ int ReadTree::readSpecialChar(istream& f, char c, string name) { try { Utils util; gobble(f); char d = f.get(); if(d == EOF){ m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n"); exit(1); } if(d != c){ m->mothurOut("Error: Expected " + name + " in input file. Found " + toString(d) + ".\n"); exit(1); } if(d == ')' && f.peek() == '\n'){ gobble(f); } return d; } catch(exception& e) { m->errorOut(e, "ReadTree", "readSpecialChar"); exit(1); } } /**************************************************************************************************/ int ReadTree::readNodeChar(istream& f) { try { Utils util; gobble(f); char d = f.get(); if(d == EOF){ m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n"); exit(1); } return d; } catch(exception& e) { m->errorOut(e, "ReadTree", "readNodeChar"); exit(1); } } /**************************************************************************************************/ float ReadTree::readBranchLength(istream& f) { try { float b; if(!(f >> b)){ m->mothurOut("Error: Missing branch length in input tree.\n"); exit(1); } gobble(f); return b; } catch(exception& e) { m->errorOut(e, "ReadTree", "readBranchLength"); exit(1); } } /***********************************************************************/ /***********************************************************************/ //Child Classes Below /***********************************************************************/ /***********************************************************************/ //This class reads a file in Newick form and stores it in a tree. int ReadNewickTree::read(CountTable* ct) { try { holder = ""; int error = 0; int comment = 0; char c; //if you are not a nexus file if ((c = filehandle.peek()) != '#') { while((c = filehandle.peek()) != EOF) { if (m->getControl_pressed()) { filehandle.close(); return 0; } while ((c = filehandle.peek()) != EOF) { if (m->getControl_pressed()) { filehandle.close(); return 0; } // get past comments if(c == '[') { comment = 1; } if(c == ']') { comment = 0; } if((c == '(') && (comment != 1)){ break; } filehandle.get(); } //make new tree T = new Tree(ct, Treenames); numNodes = T->getNumNodes(); numLeaves = T->getNumLeaves(); error = readTreeString(ct); //save trees for later commands Trees.push_back(T); gobble(filehandle); } //if you are a nexus file }else if ((c = filehandle.peek()) == '#') { //get right number of seqs from nexus file. Tree* temp = new Tree(ct, Treenames); delete temp; nexusTranslation(ct); //reads file through the translation and updates treemap while((c = filehandle.peek()) != EOF) { if (m->getControl_pressed()) { filehandle.close(); return 0; } // get past comments while ((c = filehandle.peek()) != EOF) { if (m->getControl_pressed()) { filehandle.close(); return 0; } if(holder == "[" || holder == "[!") { comment = 1; } if(holder == "]") { comment = 0; } if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;} filehandle >> holder; } //pass over the "tree rep.6878900 = " while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;} if (c == EOF ) { break; } filehandle.putback(c); //put back first ( of tree. //make new tree T = new Tree(ct, Treenames); numNodes = T->getNumNodes(); numLeaves = T->getNumLeaves(); //read tree info error = readTreeString(ct); //save trees for later commands Trees.push_back(T); } } if (error != 0) { readOk = error; } filehandle.close(); return readOk; } catch(exception& e) { m->errorOut(e, "ReadNewickTree", "read"); exit(1); } } /**************************************************************************************************/ //This function read the file through the translation of the sequences names and updates treemap. string ReadNewickTree::nexusTranslation(CountTable* ct) { try { holder = ""; int numSeqs = Treenames.size(); //must save this some when we clear old names we can still know how many sequences there were int comment = 0; // get past comments while(holder != "translate" && holder != "Translate"){ if(holder == "[" || holder == "[!") { comment = 1; } if(holder == "]") { comment = 0; } filehandle >> holder; if(holder == "tree" && comment != 1){return holder;} } string number, name; for(int i=0;i> number; filehandle >> name; name.erase(name.end()-1); //erase the comma ct->renameSeq(name, toString(number)); } return name; } catch(exception& e) { m->errorOut(e, "ReadNewickTree", "nexusTranslation"); exit(1); } } /**************************************************************************************************/ int ReadNewickTree::readTreeString(CountTable* ct) { try { int n = 0; int lc, rc; int rooted = 0; int ch = filehandle.peek(); if(ch == '('){ n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off lc = readNewickInt(filehandle, n, T, ct); if (lc == -1) { m->mothurOut("error with lc\n"); m->setControl_pressed(true); return -1; } //reports an error in reading if(filehandle.peek()==',') { readSpecialChar(filehandle,',',"comma"); } else if((ch=filehandle.peek())==';' || ch=='[') { rooted = 1; } // ';' means end of tree. if(rooted != 1){ rc = readNewickInt(filehandle, n, T, ct); if (rc == -1) { m->mothurOut("error with rc\n"); m->setControl_pressed(true); return -1; } //reports an error in reading if(filehandle.peek() == ')'){ readSpecialChar(filehandle,')',"right parenthesis"); } } } //note: treeclimber had the code below added - not sure why? else{ filehandle.putback(ch); char name[MAX_LINE]; filehandle.get(name, MAX_LINE,'\n'); SKIPLINE(filehandle, ch); n = T->getIndex(name); if(n!=0){ m->mothurOut("Internal error: The only taxon is not taxon 0.\n"); readOk = -1; return -1; } lc = rc = -1; } while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;} if(rooted != 1){ T->tree[n].setChildren(lc,rc); T->tree[n].setBranchLength(0); T->tree[n].setParent(-1); if(lc!=-1){ T->tree[lc].setParent(n); } if(rc!=-1){ T->tree[rc].setParent(n); } } return 0; } catch(exception& e) { m->errorOut(e, "ReadNewickTree", "readTreeString"); exit(1); } } /**************************************************************************************************/ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, CountTable* ct) { try { if (m->getControl_pressed()) { return -1; } int c = readNodeChar(f); if(c == '('){ //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating //read all children vector childrenNodes; while(f.peek() != ')'){ int child = readNewickInt(f, n, T, ct); if (child == -1) { return -1; } //reports an error in reading childrenNodes.push_back(child); //after a child you either have , or ), check for both if(f.peek()==')'){ break; } else if (f.peek()==',') { readSpecialChar(f,',',"comma"); } else {;} } if (childrenNodes.size() < 2) { m->mothurOut("Error in tree, please correct.\n"); return -1; } //then force into 2 node structure for (int i = 1; i < childrenNodes.size(); i++) { int lc, rc; if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; } else { lc = n-1; rc = childrenNodes[i]; } T->tree[n].setChildren(lc,rc); T->tree[lc].setParent(n); T->tree[rc].setParent(n); n++; } //to account for extra ++ in looping n--; if(f.peek()==')'){ readSpecialChar(f,')',"right parenthesis"); //to pass over labels in trees c=filehandle.get(); while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); } filehandle.putback(c); } if(f.peek() == ':'){ readSpecialChar(f,':',"colon"); if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; } T->tree[n].setBranchLength(readBranchLength(f)); }else{ T->tree[n].setBranchLength(0.0); } return n++; }else{ f.putback(c); string name = ""; char d=f.get(); while(d != ':' && d != ',' && d!=')' && d!='\n'){ name += d; d=f.get(); } int blen = 0; if(d == ':') { blen = 1; } f.putback(d); //set group info vector group = ct->getGroups(name); //find index in tree of name int n1 = T->getIndex(name); //adds sequence names that are not in group file to the "xxx" group if(group.size() == 0) { m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n"); //readOk = -1; return n1; vector currentGroups = ct->getNamesOfGroups(); Utils util; if (!util.inUsersGroups("xxx", currentGroups)) { ct->addGroup("xxx"); } currentGroups = ct->getNamesOfGroups(); vector thisCounts; thisCounts.resize(currentGroups.size(), 0); for (int h = 0; h < currentGroups.size(); h++) { if (currentGroups[h] == "xxx") { thisCounts[h] = 1; break; } } ct->push_back(name, thisCounts); group.push_back("xxx"); } T->tree[n1].setGroup(group); T->tree[n1].setChildren(-1,-1); if(blen == 1) { f.get(); T->tree[n1].setBranchLength(readBranchLength(f)); } else { T->tree[n1].setBranchLength(0.0); } while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') ) {;} f.putback(c); return n1; } } catch(exception& e) { m->errorOut(e, "ReadNewickTree", "readNewickInt"); exit(1); } } /**************************************************************************************************/ mothur-1.48.0/source/read/readtree.h000077500000000000000000000032301424121717000173010ustar00rootroot00000000000000#ifndef READTREE_H #define READTREE_H /* * readtree.h * Mothur * * Created by Sarah Westcott on 1/22/09. * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "mothur.h" #include "tree.h" #include "counttable.h" #include "utils.hpp" #define MAX_LINE 513 #define SKIPLINE(f,c) {while((c=f.get())!=EOF && ((c) != '\n')){}} class Tree; /****************************************************************************/ class ReadTree { public: ReadTree(); virtual ~ReadTree() = default;; virtual int read(CountTable*) = 0; int readSpecialChar(istream&, char, string); int readNodeChar(istream& f); float readBranchLength(istream& f); vector getTrees() { return Trees; } int AssembleTrees(); protected: vector Trees; CountTable* ct; int numNodes, numLeaves; MothurOut* m; Utils util; }; /****************************************************************************/ class ReadNewickTree : public ReadTree { public: ReadNewickTree(string file, vector T) : treeFile(file), Treenames(T) { Utils util; util.openInputFile(file, filehandle); readOk = 0; if (Treenames.size() == 0) { Treenames = util.parseTreeFile(treeFile); } } ~ReadNewickTree() = default;; int read(CountTable*); private: Tree* T; int readNewickInt(istream&, int&, Tree*, CountTable*); int readTreeString(CountTable*); string nexusTranslation(CountTable*); ifstream filehandle; string treeFile; string holder; int readOk; // readOk = 0 means success, readOk = 1 means errors. vector Treenames; }; /****************************************************************************/ #endif mothur-1.48.0/source/read/splitmatrix.cpp000077500000000000000000000356451424121717000204400ustar00rootroot00000000000000/* * splitmatrix.cpp * Mothur * * Created by westcott on 5/19/10. * Copyright 2010 Schloss Lab. All rights reserved. */ #include "splitmatrix.h" #include "phylotree.h" #include "distancecommand.h" #include "pairwiseseqscommand.h" #include "seqsummarycommand.h" #include "getseqscommand.h" #include "removeseqscommand.h" /***********************************************************************/ SplitMatrix::SplitMatrix(string ffile, string name, string count, string tax, float c, float cu, int p, bool cl, string output, bool v){ m = MothurOut::getInstance(); fastafile = ffile; namefile = name; countfile = count; taxFile = tax; cutoff = c; //tax level cutoff distCutoff = cu; //for fasta method if you are creating distance matrix you need a cutoff for that processors = p; classic = cl; outputDir = output; usingVsearchToCLuster = v; splitClassify(); } /***********************************************************************/ void SplitMatrix::splitClassify(){ try { cutoff = int(cutoff); map temp; util.readTax(taxFile, temp, true); PhyloTree phylo; for (map::iterator itTemp = temp.begin(); itTemp != temp.end();) { if (m->getControl_pressed()) { return; } phylo.addSeqToTree(itTemp->first, itTemp->second); temp.erase(itTemp++); } phylo.assignHeirarchyIDs(0); //make sure the cutoff is not greater than maxlevel if (cutoff > phylo.getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo.getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo.getMaxLevel(); } vector > seqGroups; //seqFroups[0] -> vector of string containing names of seqs assigned to group 0 vector taxGroupNames; //for each node in tree for (int i = 0; i < phylo.getNumNodes(); i++) { if (m->getControl_pressed()) { return; } //is this node within the cutoff TaxNode taxon = phylo.get(i); if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton vector thisGroupsSeqs; for (int j = 0; j < taxon.accessions.size(); j++) { thisGroupsSeqs.push_back(taxon.accessions[j]); } seqGroups.push_back(thisGroupsSeqs); taxGroupNames.push_back(taxon.name); } } } if (usingVsearchToCLuster) { createFastaFilesFromTax(seqGroups, taxGroupNames); } else { createDistanceFilesFromTax(seqGroups, taxGroupNames); } } catch(exception& e) { m->errorOut(e, "SplitMatrix", "splitClassify"); exit(1); } } /***********************************************************************/ int SplitMatrix::createDistanceFilesFromTax(vector >& seqGroups, vector groupNames){ try { int numGroups = seqGroups.size(); string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(fastafile); } string nonSingletonsFile = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)) + "nonsingleton.accnos"; ofstream outNonSingleton; util.openOutputFile(nonSingletonsFile, outNonSingleton); ifstream inFASTA; util.openInputFile(fastafile, inFASTA); SequenceDB fullDB(inFASTA); inFASTA.close(); if (m->getDebug()) { for (int i = 0; i < numGroups; i++) { m->mothurOut("[DEBUG]: Number of unique sequences for group " + groupNames[i] + " (" + toString(i+1) + " of " + toString(numGroups) + "): " + toString(seqGroups[i].size()) + "\n\n"); } } //process each group for (int i = 0; i < numGroups; i++) { if (m->getControl_pressed()) { outNonSingleton.close(); util.mothurRemove(nonSingletonsFile); for (int i = 0; i < dists.size(); i++) { util.mothurRemove((dists[i].begin()->first)); util.mothurRemove((dists[i].begin()->second)); } dists.clear(); return 0; } unordered_set thisGroupsNames = util.mothurConvert(seqGroups[i]); m->mothurOut("/******************************************/\n"); m->mothurOut("Selecting sequences for group " + groupNames[i] + " (" + toString(i+1) + " of " + toString(numGroups) + ")\nNumber of unique sequences: " + toString(seqGroups[i].size()) + "\n\n"); string outName = ""; if (namefile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(namefile); } outName = thisOutputDir + util.getRootName(util.getSimpleName(namefile)) + toString(i) + ".name.temp"; } if (countfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } outName = thisOutputDir + util.getRootName(util.getSimpleName(countfile)) + toString(i) + ".count.temp"; } pair dupsFile(namefile, outName); string dupsFormat = "name"; if (countfile != "") { dupsFile.first = countfile; dupsFormat = "count"; } Command* getCommand = new GetSeqsCommand(thisGroupsNames, nullStringPair, nullStringPair, dupsFile, dupsFormat); delete getCommand; StorageDatabase* thisDB; m->mothurOut("\nCalculating distances for group " + groupNames[i] + " (" + toString(i+1) + " of " + toString(numGroups) + "):\n"); thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(fastafile); } string outputFileRoot = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)) + toString(i) + "."; string outputformat = "column"; if (classic) { outputformat = "lt"; } Command* command; vector< vector< int > > kmerDB; vector< int > lengths; if (fullDB.sameLength()) { thisDB = new SequenceDB(fullDB, thisGroupsNames); command = new DistanceCommand(thisDB, outputFileRoot, distCutoff, outputformat, processors); } else { thisDB = new SequenceDB(fullDB, thisGroupsNames, 7, kmerDB, lengths); command = new PairwiseSeqsCommand(thisDB, kmerDB, lengths, outputFileRoot, distCutoff, outputformat, processors); } map > filenames = command->getOutputFiles(); string thisDistanceFile = ""; if (classic) { thisDistanceFile = filenames["phylip"][0]; } else { thisDistanceFile = filenames["column"][0]; } delete command; m->mothurOut("/******************************************/\n"); if (!util.isBlank(thisDistanceFile)) { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(fastafile); } string outDist = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)) + toString(i) + ".disttemp"; util.renameFile(thisDistanceFile, outDist); map thisFilePair; thisFilePair[outDist] = outName; dists.push_back(thisFilePair); for (int j = 0; j < thisDB->getNumSeqs(); j++) { outNonSingleton << thisDB->getSeq(j).getName() << endl; } }else { util.mothurRemove(thisDistanceFile); util.mothurRemove(outName); } delete thisDB; } outNonSingleton.close(); if (!util.isBlank(nonSingletonsFile)) { //there are non singletons, so remove them to find the singletons //get singletons if (namefile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(namefile); } singleton = thisOutputDir + util.getRootName(util.getSimpleName(namefile)) + "singletons.temp"; } if (countfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } singleton = thisOutputDir + util.getRootName(util.getSimpleName(countfile)) + "singletons.temp"; } pair dupsFile(namefile, singleton); string dupsFormat = "name"; if (countfile != "") { dupsFile.first = countfile; dupsFormat = "count"; } m->mothurOut("/******************************************/\n"); m->mothurOut("Finding singletons (ignore 'Removing group' messages):\n\nRunning command: remove.seqs()\n"); Command* removeCommand = new RemoveSeqsCommand(nonSingletonsFile, dupsFile, dupsFormat); delete removeCommand; m->mothurOut("/******************************************/\n"); }else { //every seqs is a singleton if (namefile != "") { singleton = namefile; } else if (countfile != "") { singleton = countfile; } } if (util.isBlank(singleton)) { util.mothurRemove(singleton); singleton = "none"; } util.mothurRemove(nonSingletonsFile); if (m->getControl_pressed()) { for (int i = 0; i < dists.size(); i++) { util.mothurRemove((dists[i].begin()->first)); util.mothurRemove((dists[i].begin()->second)); } dists.clear(); } return 0; } catch(exception& e) { m->errorOut(e, "SplitMatrix", "createDistanceFilesFromTax"); exit(1); } } /***********************************************************************/ int SplitMatrix::createFastaFilesFromTax(vector >& seqGroups, vector groupNames){ try { int numGroups = seqGroups.size(); string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(fastafile); } singleton = "none"; ifstream inFASTA; util.openInputFile(fastafile, inFASTA); SequenceDB fullDB(inFASTA); inFASTA.close(); if (!fullDB.sameLength()) { m->mothurOut("[ERROR]: Cannot cluster using vsearch with unaligned sequences, please correct.\n\n"); m->setControl_pressed(true); } if (m->getDebug()) { for (int i = 0; i < numGroups; i++) { m->mothurOut("[DEBUG]: Number of unique sequences for group " + groupNames[i] + " (" + toString(i+1) + " of " + toString(numGroups) + "): " + toString(seqGroups[i].size()) + "\n\n"); } } //process each group for (int i = 0; i < numGroups; i++) { if (m->getControl_pressed()) { for (int i = 0; i < dists.size(); i++) { util.mothurRemove((dists[i].begin()->first)); util.mothurRemove((dists[i].begin()->second)); } dists.clear(); return 0; } unordered_set thisGroupsNames = util.mothurConvert(seqGroups[i]); m->mothurOut("/******************************************/\n"); m->mothurOut("Selecting sequences for group " + groupNames[i] + " (" + toString(i+1) + " of " + toString(numGroups) + ")\nNumber of unique sequences: " + toString(seqGroups[i].size()) + "\n\n"); string outName = ""; if (namefile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(namefile); } outName = thisOutputDir + util.getRootName(util.getSimpleName(namefile)) + toString(i) + ".name.temp"; } if (countfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } outName = thisOutputDir + util.getRootName(util.getSimpleName(countfile)) + toString(i) + ".count.temp"; } pair dupsFile(namefile, outName); string dupsFormat = "name"; if (countfile != "") { dupsFile.first = countfile; dupsFormat = "count"; } Command* getCommand = new GetSeqsCommand(thisGroupsNames, nullStringPair, nullStringPair, dupsFile, dupsFormat); delete getCommand; StorageDatabase* thisDB; thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(fastafile); } string outFasta = thisOutputDir + util.getRootName(util.getSimpleName(fastafile)) + toString(i) + ".fastatemp"; thisDB = new SequenceDB(fullDB, thisGroupsNames); thisDB->print(outFasta); delete thisDB; map thisFilePair; thisFilePair[outFasta] = outName; dists.push_back(thisFilePair); } if (m->getControl_pressed()) { for (int i = 0; i < dists.size(); i++) { util.mothurRemove((dists[i].begin()->first)); util.mothurRemove((dists[i].begin()->second)); } dists.clear(); } return 0; } catch(exception& e) { m->errorOut(e, "SplitMatrix", "createDistanceFilesFromTax"); exit(1); } } /********************************************************************************************************************/ //sorts biggest to smallest inline bool compareFileSizes(map left, map right){ FILE * pFile; long leftsize = 0; //get num bytes in file string filename = left.begin()->first; pFile = fopen (filename.c_str(),"rb"); string error = "Error opening " + filename; if (pFile==nullptr) perror (error.c_str()); else{ fseek (pFile, 0, SEEK_END); leftsize=ftell (pFile); fclose (pFile); } FILE * pFile2; long rightsize = 0; //get num bytes in file filename = right.begin()->first; pFile2 = fopen (filename.c_str(),"rb"); error = "Error opening " + filename; if (pFile2==nullptr) perror (error.c_str()); else{ fseek (pFile2, 0, SEEK_END); rightsize=ftell (pFile2); fclose (pFile2); } return (leftsize > rightsize); } /***********************************************************************/ //returns map of distance files -> namefile sorted by distance file size vector< map< string, string> > SplitMatrix::getDistanceFiles(){ try { sort(dists.begin(), dists.end(), compareFileSizes); return dists; } catch(exception& e) { m->errorOut(e, "SplitMatrix", "getDistanceFiles"); exit(1); } } //********************************************************************************************************************/ mothur-1.48.0/source/read/splitmatrix.h000077500000000000000000000026541424121717000200770ustar00rootroot00000000000000#ifndef SPLITMATRIX_H #define SPLITMATRIX_H /* * splitmatrix.h * Mothur * * Created by westcott on 5/19/10. * Copyright 2010 Schloss Lab. All rights reserved. * */ #include "mothur.h" #include "mothurout.h" #include "utils.hpp" #include "counttable.h" /******************************************************/ class SplitMatrix { public: SplitMatrix(string, string, string, string, float, float, int, bool, string, bool); //fastafile, namefile, countfile, taxFile, taxcutoff, cutoff, processors, classic, outputDir, usingVsearchToCLuster ~SplitMatrix() = default; vector< map > getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size string getSingletonNames() { return singleton; } //returns namesfile or countfile containing singletons //long long getNumSingleton() { return numSingleton; } //returns namesfile containing singletons private: MothurOut* m; Utils util; string distFile, namefile, singleton, taxFile, fastafile, outputDir, countfile; vector< map< string, string> > dists; float cutoff, distCutoff; bool classic, usingVsearchToCLuster; int processors; void splitClassify(); int createDistanceFilesFromTax(vector >&, vector); int createFastaFilesFromTax(vector >&, vector); }; /******************************************************/ #endif mothur-1.48.0/source/read/treereader.cpp000077500000000000000000000103521424121717000201660ustar00rootroot00000000000000// // treereader.cpp // Mothur // // Created by Sarah Westcott on 4/11/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "treereader.h" #include "readtree.h" #include "groupmap.h" /***********************************************************************/ TreeReader::TreeReader(string tf, string cf) : treefile(tf), countfile(cf) { try { m = MothurOut::getInstance(); ct = new CountTable(); ct->readTable(cf, true, false); Utils util; Treenames = util.parseTreeFile(treefile); //fills treenames //if no groupinfo in count file we need to add it if (!ct->hasGroupInfo()) { ct->addGroup("Group1"); vector namesOfSeqs = ct->getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { ct->setAbund(namesOfSeqs[i], "Group1", ct->getNumSeqs(namesOfSeqs[i])); } } namefile = ""; groupfile = ""; readTrees(); } catch(exception& e) { m->errorOut(e, "TreeReader", "TreeReader"); exit(1); } } /***********************************************************************/ TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf), groupfile(gf), namefile(nf) { try { m = MothurOut::getInstance(); Utils util; Treenames = util.parseTreeFile(treefile); //fills treenames countfile = ""; ct = new CountTable(); if (namefile != "") { ct->createTable(namefile, groupfile, nullVector, true); } else { set nameMap; map groupMap; set gps; for (int i = 0; i < Treenames.size(); i++) { nameMap.insert(Treenames[i]); } if (groupfile == "") { gps.insert("Group1"); for (int i = 0; i < Treenames.size(); i++) { groupMap[Treenames[i]] = "Group1"; } } else { GroupMap g(groupfile); g.readMap(); vector seqs = g.getNamesSeqs(); for (int i = 0; i < seqs.size(); i++) { string group = g.getGroup(seqs[i]); groupMap[seqs[i]] = group; gps.insert(group); } } ct->createTable(nameMap, groupMap, gps); } readTrees(); } catch(exception& e) { m->errorOut(e, "TreeReader", "TreeReader"); exit(1); } } /***********************************************************************/ bool TreeReader::readTrees() { try { int numUniquesInName = ct->getNumUniqueSeqs(); ReadTree* read = new ReadNewickTree(treefile, Treenames); int readOk = read->read(ct); if (readOk != 0) { m->mothurOut("Read Terminated.\n"); delete read; m->setControl_pressed(true); return 0; } read->AssembleTrees(); trees = read->getTrees(); delete read; //make sure all files match //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. int numNamesInTree; if (namefile != "") { if (numUniquesInName == Treenames.size()) { numNamesInTree = ct->getNumSeqs(); } else { numNamesInTree = Treenames.size(); } }else { numNamesInTree = Treenames.size(); } //output any names that are in group file but not in tree if (numNamesInTree < ct->getNumSeqs()) { vector namesSeqsCt = ct->getNamesOfSeqs(); for (int i = 0; i < namesSeqsCt.size(); i++) { //is that name in the tree? int count = 0; for (int j = 0; j < Treenames.size(); j++) { if (namesSeqsCt[i] == Treenames[j]) { break; } //found it count++; } if (m->getControl_pressed()) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; } return 0; } //then you did not find it so report it if (count == Treenames.size()) { m->mothurOut(namesSeqsCt[i] + " is in your name or group file and not in your tree. It will be disregarded.\n"); ct->remove(namesSeqsCt[i]); } } } return true; } catch(exception& e) { m->errorOut(e, "TreeReader", "readTrees"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/read/treereader.h000077500000000000000000000013011424121717000176250ustar00rootroot00000000000000#ifndef Mothur_treereader_h #define Mothur_treereader_h // // treereader.h // Mothur // // Created by Sarah Westcott on 4/11/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "mothurout.h" #include "tree.h" #include "counttable.h" class TreeReader { public: TreeReader(string tf, string cf); TreeReader(string tf, string gf, string nf); ~TreeReader() = default; vector getTrees() { return trees; } private: MothurOut* m; vector trees; CountTable* ct; vector Treenames; string treefile, groupfile, namefile, countfile; bool readTrees(); int readNamesFile(); }; #endif mothur-1.48.0/source/refchimeratest.cpp000077500000000000000000000474171424121717000201520ustar00rootroot00000000000000/* * refchimeratest.cpp * Mothur * * Created by Pat Schloss on 1/31/11. * Copyright 2011 Schloss Lab. All rights reserved. * */ #include "refchimeratest.h" #include "mothur.h" //*************************************************************************************************************** RefChimeraTest::RefChimeraTest(vector& refs, bool aligned) : aligned(aligned){ m = MothurOut::getInstance(); numRefSeqs = refs.size(); referenceSeqs.resize(numRefSeqs); referenceNames.resize(numRefSeqs); for(int i=0;ierrorOut(e, "RefChimeraTest", "printHeader"); exit(1); } } //*************************************************************************************************************** int RefChimeraTest::analyzeQuery(string queryName, string querySeq, string& output){ int numParents = -1; if(aligned){ numParents = analyzeAlignedQuery(queryName, querySeq, output); } else{ numParents = analyzeUnalignedQuery(queryName, querySeq, output); } return numParents; } //*************************************************************************************************************** int RefChimeraTest::analyzeAlignedQuery(string queryName, string querySeq, string& output){ vector > left; left.resize(numRefSeqs); vector > right; right.resize(numRefSeqs); vector singleLeft, bestLeft; vector singleRight, bestRight; for(int i=0;i= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){ nMera = 2; chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi); } else{ nMera = 1; chimeraRefSeq = referenceSeqs[bestMatchIndex]; } bestRefAlignment = chimeraRefSeq; bestQueryAlignment = querySeq; double distToChimera = calcDistToChimera(bestQueryAlignment, bestRefAlignment); output = queryName + "\t" + referenceNames[bestMatchIndex] + "\t" + toString(bestSequenceMismatch) + "\t"; output += referenceNames[leftParentBi] + ',' + referenceNames[rightParentBi] + "\t" + toString(breakPointBi) + "\t"; output += toString(minMismatchToChimera) + "\t"; output += toString(distToChimera) + "\t" + toString(nMera) +"\n"; bestMatch = bestMatchIndex; return nMera; } //*************************************************************************************************************** int RefChimeraTest::analyzeUnalignedQuery(string queryName, string querySeq, string& output){ int nMera = 0; int seqLength = querySeq.length(); vector queryAlign; queryAlign.resize(numRefSeqs); vector refAlign; refAlign.resize(numRefSeqs); vector > leftDiffs; leftDiffs.resize(numRefSeqs); vector > rightDiffs; rightDiffs.resize(numRefSeqs); vector > leftMaps; leftMaps.resize(numRefSeqs); vector > rightMaps; rightMaps.resize(numRefSeqs); int bestRefIndex = -1; int bestRefDiffs = numeric_limits::max(); double bestRefLength = 0; // if (queryName == "OTU_1008") { // cout << queryName << endl << querySeq << endl << endl; // } for(int i=0;i= 3){ for(int i=0;i singleLeft(seqLength, numeric_limits::max()); vector bestLeft(seqLength, -1); for(int l=0;l singleRight(seqLength, numeric_limits::max()); vector bestRight(seqLength, -1); for(int l=0;l::max(); int leftParent = 0; int rightParent = 0; int breakPoint = 0; for(int l=0;l= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){ nMera = 2; int breakLeft = leftMaps[leftParent][breakPoint]; int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2]; string left = refAlign[leftParent]; string right = refAlign[rightParent]; for(int i=0;i<=breakLeft;i++){ if (m->getControl_pressed()) { return 0; } if(left[i] != '-' && left[i] != '.'){ reference += left[i]; } } for(int i=breakRight;igetControl_pressed()) { return 0; } if(right[i] != '-' && right[i] != '.'){ reference += right[i]; } } } else{ nMera = 1; reference = referenceSeqs[bestRefIndex]; } double alignLength = 0.0; double finalDiffs = alignQueryToReferences(querySeq, reference, bestQueryAlignment, bestRefAlignment, alignLength); double finalDistance = finalDiffs / alignLength; output = queryName + "\t" + referenceNames[bestRefIndex] + "\t" + toString(bestRefDiffs) + "\t"; output += referenceNames[leftParent] + ',' + referenceNames[rightParent] + "\t" + toString(breakPoint) + "\t"; output += toString(bestChimeraMismatches) + "\t"; output += toString(finalDistance) + "\t" + toString(nMera) +"\n"; } else{ bestQueryAlignment = queryAlign[bestRefIndex]; bestRefAlignment = refAlign[bestRefIndex]; nMera = 1; output = queryName + "\t" + referenceNames[bestRefIndex] + "\t" + toString(bestRefDiffs) + "\tNA\tNA\tNA\tNA\t1\n"; } bestMatch = bestRefIndex; return nMera; } /**************************************************************************************************/ double RefChimeraTest::alignQueryToReferences(string query, string reference, string& qAlign, string& rAlign, double& length){ try { double GAP = -5; double MATCH = 1; double MISMATCH = -1; int queryLength = query.length(); int refLength = reference.length(); vector > alignMatrix; alignMatrix.resize(queryLength + 1); vector > alignMoves; alignMoves.resize(queryLength + 1); for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i].resize(refLength + 1, 0); alignMoves[i].resize(refLength + 1, 'x'); } for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i][0] = 0;//GAP * i; alignMoves[i][0] = 'u'; } for(int i=0;i<=refLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[0][i] = 0;//GAP * i; alignMoves[0][i] = 'l'; } for(int i=1;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } for(int j=1;j<=refLength;j++){ double nogapScore; if(query[i-1] == reference[j-1]){ nogapScore = alignMatrix[i-1][j-1] + MATCH; } else { nogapScore = alignMatrix[i-1][j-1] + MISMATCH; } double leftScore; if(i == queryLength) { leftScore = alignMatrix[i][j-1]; } else { leftScore = alignMatrix[i][j-1] + GAP; } double upScore; if(j == refLength) { upScore = alignMatrix[i-1][j]; } else { upScore = alignMatrix[i-1][j] + GAP; } if(nogapScore > leftScore){ if(nogapScore > upScore){ alignMoves[i][j] = 'd'; alignMatrix[i][j] = nogapScore; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = upScore; } } else{ if(leftScore > upScore){ alignMoves[i][j] = 'l'; alignMatrix[i][j] = leftScore; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = upScore; } } } } int end = refLength - 1; int maxRow = 0; double maxRowValue = -2147483647; for(int i=0;i maxRowValue){ maxRow = i; maxRowValue = alignMatrix[i][end]; } } end = queryLength - 1; int maxColumn = 0; double maxColumnValue = -2147483647; for(int j=0;j maxColumnValue){ maxColumn = j; maxColumnValue = alignMatrix[end][j]; } } int row = queryLength-1; int column = refLength-1; if(maxColumn == column && maxRow == row){} // if the max values are the lower right corner, then we're good else if(alignMatrix[row][maxColumn] < alignMatrix[maxRow][column]){ for(int i=maxRow+1;i 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(alignMoves[i][j] == 'd'){ qAlign = query[i-1] + qAlign; rAlign = reference[j-1] + rAlign; if(query[i-1] != reference[j-1]){ diffs++; } length++; i--; j--; } else if(alignMoves[i][j] == 'u'){ qAlign = query[i-1] + qAlign; if(j != refLength) { rAlign = '-' + rAlign; diffs++; length++; } else { rAlign = '.' + rAlign; } i--; } else if(alignMoves[i][j] == 'l'){ rAlign = reference[j-1] + rAlign; if(i != queryLength){ qAlign = '-' + qAlign; diffs++; length++; } else { qAlign = '.' + qAlign; } j--; } } if(i>0){ qAlign = query.substr(0, i) + qAlign; rAlign = string(i, '.') + rAlign; } else if(j>0){ qAlign = string(j, '.') + qAlign; rAlign = reference.substr(0, j) + rAlign; } if (length == 0) { diffs = MOTHURMAX; } return diffs; } catch(exception& e) { m->errorOut(e, "RefChimeraTest", "alignQueryToReferences"); exit(1); } } /**************************************************************************************************/ int RefChimeraTest::getUnalignedDiffs(string qAlign, string rAlign, vector& leftDiffs, vector& leftMap, vector& rightDiffs, vector& rightMap){ try { int alignLength = qAlign.length(); int lDiffs = 0; int lCount = 0; for(int l=0;lgetControl_pressed()) { return 0; } if(qAlign[l] == '-'){ lDiffs++; } else if(qAlign[l] != '.'){ if(rAlign[l] == '-'){ lDiffs++; } else if(qAlign[l] != rAlign[l]){;// && rAlign[l] != '.'){ lDiffs++; } leftDiffs[lCount] = lDiffs; leftMap[lCount] = l; lCount++; } } int rDiffs = 0; int rCount = 0; for(int l=alignLength-1;l>=0;l--){ if (m->getControl_pressed()) { return 0; } if(qAlign[l] == '-'){ rDiffs++; } else if(qAlign[l] != '.'){ if(rAlign[l] == '-'){ rDiffs++; } else if(qAlign[l] != rAlign[l]){;// && rAlign[l] != '.'){ rDiffs++; } rightDiffs[rCount] = rDiffs; rightMap[rCount] = l; rCount++; } } return 0; } catch(exception& e) { m->errorOut(e, "RefChimeraTest", "getUnalignedDiffs"); exit(1); } } /**************************************************************************************************/ int RefChimeraTest::getAlignedMismatches(string& querySeq, vector >& left, vector >& right, int& bestRefSeq){ int bestSequenceMismatch = MOTHURMAX; for(int i=0;i=0;l--){ if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){ rDiffs++; } right[i][index++] = rDiffs; } if(lDiffs < bestSequenceMismatch){ bestSequenceMismatch = lDiffs; bestRefSeq = i; } } return bestSequenceMismatch; } /**************************************************************************************************/ int RefChimeraTest::getChimera(vector >& left, vector >& right, int& leftParent, int& rightParent, int& breakPoint, vector& singleLeft, vector& bestLeft, vector& singleRight, vector& bestRight){ singleLeft.resize(alignLength, MOTHURMAX); bestLeft.resize(alignLength, -1); for(int l=0;l >& left, vector >& right, int& leftParent, int& middleParent, int& rightParent, int& breakPointA, int& breakPointB, vector& singleLeft, vector& bestLeft, vector& singleRight, vector& bestRight){ int bestTrimeraMismatches = MOTHURMAX; leftParent = -1; middleParent = -1; rightParent = -1; breakPointA = -1; breakPointB = -1; vector > minDelta; minDelta.resize(alignLength); vector > minDeltaSeq; minDeltaSeq.resize(alignLength); for(int i=0;i&, bool); string getHeader(); int analyzeQuery(string, string, string&); int getClosestRefIndex(); string getClosestRefAlignment(); string getQueryAlignment(); private: int getAlignedMismatches(string&, vector >&, vector >&, int&); int analyzeAlignedQuery(string, string, string&); int analyzeUnalignedQuery(string, string, string&); double alignQueryToReferences(string, string, string&, string&, double&); int getUnalignedDiffs(string, string, vector&, vector&, vector&, vector&); int getChimera(vector >&, vector >&, int&, int&, int&, vector&, vector&, vector&, vector&); int getTrimera(vector >&, vector >&, int&, int&, int&, int&, int&, vector&, vector&, vector&, vector&); string stitchBimera(int, int, int); string stitchTrimera(int, int, int, int, int); double calcDistToChimera(string&, string&); vector referenceSeqs; vector referenceNames; int numRefSeqs; int alignLength; int bestMatch; string bestRefAlignment; string bestQueryAlignment; bool aligned; MothurOut* m; }; #endif mothur-1.48.0/source/sensspeccalc.cpp000077500000000000000000000053421424121717000176020ustar00rootroot00000000000000// // sensspeccalc.cpp // Mothur // // Created by Sarah Westcott on 1/22/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #include "sensspeccalc.hpp" //*************************************************************************************************************** //removes anyone with no valid dists and changes name to matrix short names SensSpecCalc::SensSpecCalc(OptiData& matrix, ListVector* list){ try { m = MothurOut::getInstance(); map nameIndex = matrix.getNameIndexMap(); if (list != nullptr) { //for each bin for (int i = 0; i < list->getNumBins(); i++) { string binnames = list->get(i); vector bnames; util.splitAtComma(binnames, bnames); vector newNames; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; map::iterator itSeq1 = nameIndex.find(name); long long seq1Index = -1; if (itSeq1 != nameIndex.end()) { seq1Index = itSeq1->second; } //you have distances in the matrix newNames.push_back(seq1Index); } //if there are names in this bin add to new list if (newNames.size() != 0) { otus.push_back(newNames); } } } } catch(exception& e) { m->errorOut(e, "SensSpecCalc", "SensSpecCalc"); exit(1); } } //*************************************************************************************************************** void SensSpecCalc::getResults(OptiData& matrix, double& tp, double& tn, double& fp, double& fn){ try { tp = 0; tn = 0; fp = 0; fn = 0; for(int otu=0;otugetControl_pressed()) { break; } for(int i=0;ierrorOut(e, "SensSpecCalc", "getResults"); exit(1); } } //*************************************************************************************************************** mothur-1.48.0/source/sensspeccalc.hpp000077500000000000000000000010551424121717000176040ustar00rootroot00000000000000// // sensspeccalc.hpp // Mothur // // Created by Sarah Westcott on 1/22/18. // Copyright © 2018 Schloss Lab. All rights reserved. // #ifndef sensspeccalc_hpp #define sensspeccalc_hpp #include "mothurout.h" #include "optimatrix.h" class SensSpecCalc { public: SensSpecCalc(OptiData& matrix, ListVector* list); ~SensSpecCalc(){} void getResults(OptiData& matrix, double& tp, double& tn, double& fp, double& fn); private: Utils util; MothurOut* m; vector > otus; }; #endif /* sensspeccalc_hpp */ mothur-1.48.0/source/seqnoise.cpp000077500000000000000000000626021424121717000167640ustar00rootroot00000000000000/* * mySeqNoise.cpp * * * Created by Pat Schloss on 8/31/11. * Copyright 2011 Patrick D. Schloss. All rights reserved. * */ #include "seqnoise.h" #include "sequence.hpp" #include "listvector.hpp" #include "inputdata.h" #define MIN_DELTA 1.0e-6 #define MIN_ITER 20 #define MAX_ITER 1000 #define MIN_COUNT 0.1 #define MIN_TAU 1.0e-4 #define MIN_WEIGHT 0.1 /**************************************************************************************************/ int seqNoise::getSequenceData(string sequenceFileName, vector& sequences){ try { ifstream sequenceFile; util.openInputFile(sequenceFileName, sequenceFile); while(!sequenceFile.eof()){ if (m->getControl_pressed()) { break; } Sequence temp(sequenceFile); gobble(sequenceFile); if (temp.getName() != "") { sequences.push_back(temp.getAligned()); } } sequenceFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "getSequenceData"); exit(1); } } /**************************************************************************************************/ int seqNoise::addSeq(string seq, vector& sequences){ try { sequences.push_back(seq); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "addSeq"); exit(1); } } /**************************************************************************************************/ //no checks for file mismatches int seqNoise::getRedundantNames(string namesFileName, vector& uniqueNames, vector& redundantNames, vector& seqFreq){ try { string unique, redundant; ifstream namesFile; util.openInputFile(namesFileName, namesFile); for(int i=0;igetControl_pressed()) { break; } namesFile >> uniqueNames[i]; gobble(namesFile); namesFile >> redundantNames[i]; gobble(namesFile); seqFreq[i] = util.getNumNames(redundantNames[i]); } namesFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "getRedundantNames"); exit(1); } } /**************************************************************************************************/ int seqNoise::addRedundantName(string uniqueName, string redundantName, vector& uniqueNames, vector& redundantNames, vector& seqFreq){ try { uniqueNames.push_back(uniqueName); redundantNames.push_back(redundantName); seqFreq.push_back(util.getNumNames(redundantName)); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "addRedundantName"); exit(1); } } /**************************************************************************************************/ int seqNoise::getDistanceData(string distFileName, vector& distances){ try { ifstream distFile; util.openInputFile(distFileName, distFile); int numSeqs = 0; string name = ""; distFile >> numSeqs; for(int i=0;igetControl_pressed()) { break; } distances[i * numSeqs + i] = 0.0000; distFile >> name; for(int j=0;j> distances[i * numSeqs + j]; distances[j * numSeqs + i] = distances[i * numSeqs + j]; } } distFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "getDistanceData"); exit(1); } } /**************************************************************************************************/ int seqNoise::getListData(string listFileName, double cutOff, vector& otuData, vector& otuFreq, vector >& otuBySeqLookUp){ try { ifstream listFile; util.openInputFile(listFileName, listFile); bool adjustCutoff = true; string lastLabel = ""; string readLabels = ""; while(!listFile.eof()){ ListVector list(listFile, readLabels, lastLabel); gobble(listFile); //10/18/13 - change to reading with listvector to accomodate changes to the listfiel format. ie. adding header labels. string thisLabel = list.getLabel(); lastLabel = thisLabel; if (thisLabel == "unique") {} //skip to next label in listfile else { double threshold; util.mothurConvert(thisLabel, threshold); if(threshold < cutOff){} //skip to next label in listfile else{ adjustCutoff = false; int numOTUs = list.getNumBins(); otuFreq.resize(numOTUs, 0); for(int i=0;igetControl_pressed()) { return 0; } string otu = list.get(i); int count = 0; string number = ""; for(int j=0;jgetControl_pressed()) { return 0; } otuBySeqLookUp[otuData[i]].push_back(i); } for(int i=0;igetControl_pressed()) { return 0; } for(int j=otuBySeqLookUp[i].size();jgetNumBins(); otuFreq.resize(numOTUs, 0); for(int i=0;igetControl_pressed()) { return 0; } string otu = list->get(i); int count = 0; string number = ""; for(int j=0;jgetControl_pressed()) { return 0; } otuBySeqLookUp[otuData[i]].push_back(i); } for(int i=0;igetControl_pressed()) { return 0; } for(int j=otuBySeqLookUp[i].size();jerrorOut(e, "seqNoise", "getListData"); exit(1); } } /**************************************************************************************************/ int seqNoise::updateOTUCountData(vector otuFreq, vector > otuBySeqLookUp, vector > aanI, vector& anP, vector& anI, vector& cumCount ){ try { int numOTUs = otuFreq.size(); int count = 0; for(int i=0;igetControl_pressed()) { return 0; } for(int j=0;jerrorOut(e, "seqNoise", "updateOTUCountData"); exit(1); } } /**************************************************************************************************/ double seqNoise::calcNewWeights( vector& weights, // vector seqFreq, // vector anI, // vector cumCount, // vector anP, // vector otuFreq, // vector tau // ){ try { int numOTUs = weights.size(); double maxChange = -1; cout.flush(); for(int i=0;igetControl_pressed()) { return 0; } double change = weights[i]; weights[i] = 0.0000; for(int j=0;j maxChange){ maxChange = change; } cout.flush(); } return maxChange; } catch(exception& e) { m->errorOut(e, "seqNoise", "calcNewWeights"); exit(1); } } /**************************************************************************************************/ int seqNoise::calcCentroids( vector anI, vector anP, vector& change, vector& centroids, vector cumCount, vector distances,/// vector seqFreq, vector otuFreq, vector tau ){ try { int numOTUs = change.size(); int numSeqs = seqFreq.size(); for(int i=0;igetControl_pressed()) { return 0; } int minFIndex = -1; double minFValue = 1e10; change[i] = 0; double count = 0.00000; int freqOfOTU = otuFreq[i]; for(int j=0;j 0 && count > MIN_COUNT){ vector adF(freqOfOTU); vector anL(freqOfOTU); for(int j=0;jerrorOut(e, "seqNoise", "calcCentroids"); exit(1); } } /**************************************************************************************************/ int seqNoise::checkCentroids(vector& weights, vector centroids){ try { int numOTUs = centroids.size(); vector unique(numOTUs, 1); double minWeight = MIN_WEIGHT; for(int i=0;igetControl_pressed()) { return 0; } if(weights[i] < minWeight){ unique[i] = -1; } } for(int i=0;igetControl_pressed()) { return 0; } if(unique[i] == 1){ for(int j=i+1; jerrorOut(e, "seqNoise", "checkCentroids"); exit(1); } } /**************************************************************************************************/ int seqNoise::setUpOTUData(vector& otuData, vector& percentage, vector cumCount, vector tau, vector otuFreq, vector anP, vector anI){ try { int numOTUs = cumCount.size(); int numSeqs = otuData.size(); vector bestTau(numSeqs, 0); vector bestIndex(numSeqs, -1); for(int i=0;igetControl_pressed()) { return 0; } for(int j=0;j bestTau[index2]){ bestTau[index2] = thisTau; bestIndex[index2] = i; } } } for(int i=0;igetControl_pressed()) { return 0; } otuData[i] = bestIndex[i]; percentage[i] = 1 - bestTau[i]; } return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "setUpOTUData"); exit(1); } } /**************************************************************************************************/ int seqNoise::finishOTUData(vector otuData, vector& otuFreq, vector& anP, vector& anI, vector& cumCount, vector >& otuBySeqLookUp, vector >& aanI, vector& tau){ try { int numSeqs = otuData.size(); int numOTUs = otuFreq.size(); int total = numSeqs; otuFreq.assign(numOTUs, 0); tau.assign(numSeqs, 1); anP.assign(numSeqs, 0); anI.assign(numSeqs, 0); for(int i=0;igetControl_pressed()) { return 0; } int otu = otuData[i]; total++; otuBySeqLookUp[otu][otuFreq[otu]] = i; aanI[otu][otuFreq[otu]] = i; otuFreq[otu]++; } updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "finishOTUData"); exit(1); } } /**************************************************************************************************/ int seqNoise::getLastMatch(char direction, vector >& alignMoves, int i, int j, vector& seqA, vector& seqB){ try{ char nullReturn = -1; while(i>=1 && j>=1){ if (m->getControl_pressed()) { return nullReturn; } if(direction == 'd'){ if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; } else { return nullReturn; } } else if(direction == 'l') { j--; } else { i--; } direction = alignMoves[i][j]; } return nullReturn; } catch(exception& e) { m->errorOut(e, "seqNoise", "getLastMatch"); exit(1); } } /**************************************************************************************************/ int seqNoise::countDiffs(vector query, vector ref){ try { //double MATCH = 5.0; //double MISMATCH = -2.0; //double GAP = -2.0; vector > correctMatrix(4); for(int i=0;i<4;i++){ correctMatrix[i].resize(4); } correctMatrix[0][0] = 0.000000; //AA correctMatrix[1][0] = 11.619259; //CA correctMatrix[2][0] = 11.694004; //TA correctMatrix[3][0] = 7.748623; //GA correctMatrix[1][1] = 0.000000; //CC correctMatrix[2][1] = 7.619657; //TC correctMatrix[3][1] = 12.852562; //GC correctMatrix[2][2] = 0.000000; //TT correctMatrix[3][2] = 10.964048; //TG correctMatrix[3][3] = 0.000000; //GG for(int i=0;i<4;i++){ for(int j=0;j > alignMatrix(queryLength + 1); vector > alignMoves(queryLength + 1); for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i].resize(refLength + 1, 0); alignMoves[i].resize(refLength + 1, 'x'); } for(int i=0;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[i][0] = 15.0 * i; alignMoves[i][0] = 'u'; } for(int i=0;i<=refLength;i++){ if (m->getControl_pressed()) { return 0; } alignMatrix[0][i] = 15.0 * i; alignMoves[0][i] = 'l'; } for(int i=1;i<=queryLength;i++){ if (m->getControl_pressed()) { return 0; } for(int j=1;j<=refLength;j++){ double nogap; nogap = alignMatrix[i-1][j-1] + correctMatrix[query[i-1]][ref[j-1]]; double gap; double left; if(i == queryLength){ //terminal gap left = alignMatrix[i][j-1]; } else{ if(ref[j-1] == getLastMatch('l', alignMoves, i, j, query, ref)){ gap = 4.0; } else{ gap = 15.0; } left = alignMatrix[i][j-1] + gap; } double up; if(j == refLength){ //terminal gap up = alignMatrix[i-1][j]; } else{ if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, ref)){ gap = 4.0; } else{ gap = 15.0; } up = alignMatrix[i-1][j] + gap; } if(nogap < left){ if(nogap < up){ alignMoves[i][j] = 'd'; alignMatrix[i][j] = nogap; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } else{ if(left < up){ alignMoves[i][j] = 'l'; alignMatrix[i][j] = left; } else{ alignMoves[i][j] = 'u'; alignMatrix[i][j] = up; } } } } int i = queryLength; int j = refLength; int diffs = 0; // string alignA = ""; // string alignB = ""; // string bases = "ACTG"; while(i > 0 && j > 0){ if (m->getControl_pressed()) { return 0; } if(alignMoves[i][j] == 'd'){ // alignA = bases[query[i-1]] + alignA; // alignB = bases[ref[j-1]] + alignB; if(query[i-1] != ref[j-1]) { diffs++; } i--; j--; } else if(alignMoves[i][j] == 'u'){ if(j != refLength){ // alignA = bases[query[i-1]] + alignA; // alignB = '-' + alignB; diffs++; } i--; } else if(alignMoves[i][j] == 'l'){ if(i != queryLength){ // alignA = '-' + alignA; // alignB = bases[ref[j-1]] + alignB; diffs++; } j--; } } return diffs; } catch(exception& e) { m->errorOut(e, "seqNoise", "countDiffs"); exit(1); } } /**************************************************************************************************/ vector seqNoise::convertSeq(string bases){ try { vector numbers(bases.length(), -1); for(int i=0;igetControl_pressed()) { return numbers; } char b = bases[i]; if(b == 'A') { numbers[i] = 0; } else if(b=='C') { numbers[i] = 1; } else if(b=='T') { numbers[i] = 2; } else if(b=='G') { numbers[i] = 3; } else { numbers[i] = 0; } } return numbers; } catch(exception& e) { m->errorOut(e, "seqNoise", "convertSeq"); exit(1); } } /**************************************************************************************************/ string seqNoise::degapSeq(string aligned){ try { string unaligned = ""; for(int i=0;igetControl_pressed()) { return ""; } if(aligned[i] != '-' && aligned[i] != '.'){ unaligned += aligned[i]; } } return unaligned; } catch(exception& e) { m->errorOut(e, "seqNoise", "degapSeq"); exit(1); } } /**************************************************************************************************/ int seqNoise::writeOutput(string fastaFileName, string namesFileName, string uMapFileName, vector finalTau, vector centroids, vector otuData, vector sequences, vector uniqueNames, vector redundantNames, vector seqFreq, vector& distances){ try { int numOTUs = finalTau.size(); int numSeqs = uniqueNames.size(); ofstream fastaFile(fastaFileName.c_str()); ofstream namesFile(namesFileName.c_str()); ofstream uMapFile(uMapFileName.c_str()); vector maxSequenceAbund(numOTUs, 0); vector maxSequenceIndex(numOTUs, 0); for(int i=0;igetControl_pressed()) { return 0; } if(maxSequenceAbund[otuData[i]] < seqFreq[i]){ maxSequenceAbund[otuData[i]] = seqFreq[i]; maxSequenceIndex[otuData[i]] = i; } } int count = 1; for(int i=0;igetControl_pressed()) { return 0; } if(finalTau[i] > 0){ if((maxSequenceIndex[i] != centroids[i]) && util.isEqual(distances[maxSequenceIndex[i]*numSeqs + centroids[i]], 0)){ centroids[i] = maxSequenceIndex[i]; } int index = centroids[i]; fastaFile << '>' << uniqueNames[index] << endl << sequences[index] << endl; namesFile << uniqueNames[index] << '\t'; string refSeq = sequences[index]; string redundantSeqs = redundantNames[index];; vector frequencyData; for(int j=0;j rUnalign = convertSeq(refDegap); uMapFile << "ideal_seq_" << count << '\t' << finalTau[i] << endl; uMapFile << uniqueNames[index] << '\t' << seqFreq[index] << "\t0\t" << refDegap << endl; for(int j=0;jgetControl_pressed()) { return 0; } redundantSeqs += ',' + redundantNames[frequencyData[j].index]; uMapFile << uniqueNames[frequencyData[j].index] << '\t' << seqFreq[frequencyData[j].index] << '\t'; string querySeq = sequences[frequencyData[j].index]; string queryDegap = degapSeq(querySeq); vector qUnalign = convertSeq(queryDegap); int udiffs = countDiffs(qUnalign, rUnalign); uMapFile << udiffs << '\t' << queryDegap << endl; } uMapFile << endl; namesFile << redundantSeqs << endl; count++; } } fastaFile.close(); namesFile.close(); uMapFile.close(); return 0; } catch(exception& e) { m->errorOut(e, "seqNoise", "writeOutput"); exit(1); } } /************************************************************************************************** int main(int argc, char *argv[]){ double sigma = 100; sigma = atof(argv[5]); double cutOff = 0.08; int minIter = 10; int maxIter = 1000; double minDelta = 1e-6; string sequenceFileName = argv[1]; string fileNameStub = sequenceFileName.substr(0,sequenceFileName.find_last_of('.')) + ".shhh"; vector sequences; getSequenceData(sequenceFileName, sequences); int numSeqs = sequences.size(); vector uniqueNames(numSeqs); vector redundantNames(numSeqs); vector seqFreq(numSeqs); string namesFileName = argv[4]; getRedundantNames(namesFileName, uniqueNames, redundantNames, seqFreq); string distFileName = argv[2]; vector distances(numSeqs * numSeqs); getDistanceData(distFileName, distances); string listFileName = argv[3]; vector otuData(numSeqs); vector otuFreq; vector > otuBySeqLookUp; getListData(listFileName, cutOff, otuData, otuFreq, otuBySeqLookUp); int numOTUs = otuFreq.size(); vector weights(numOTUs, 0); vector change(numOTUs, 1); vector centroids(numOTUs, -1); vector cumCount(numOTUs, 0); vector tau(numSeqs, 1); vector anP(numSeqs, 0); vector anI(numSeqs, 0); vector anN(numSeqs, 0); vector > aanI = otuBySeqLookUp; int numIters = 0; double maxDelta = MOTHURMAX; while(numIters < minIter || ((maxDelta > minDelta) && (numIters < maxIter))){ updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); maxDelta = calcNewWeights(weights, seqFreq, anI, cumCount, anP, otuFreq, tau); calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); checkCentroids(weights, centroids); otuFreq.assign(numOTUs, 0); int total = 0; for(int i=0;i currentTau(numOTUs); for(int j=0;j minWeight && distances[i * numSeqs+centroids[j]] < offset){ offset = distances[i * numSeqs+centroids[j]]; } } for(int j=0;j minWeight){ currentTau[j] = exp(sigma * (-distances[(i * numSeqs + centroids[j])] + offset)) * weights[j]; norm += currentTau[j]; } else{ currentTau[j] = 0.0000; } } for(int j=0;j MIN_TAU){ int oldTotal = total; total++; tau.resize(oldTotal+1); tau[oldTotal] = currentTau[j]; otuBySeqLookUp[j][otuFreq[j]] = oldTotal; aanI[j][otuFreq[j]] = i; otuFreq[j]++; } } anP.resize(total); anI.resize(total); } numIters++; } updateOTUCountData(otuFreq, otuBySeqLookUp, aanI, anP, anI, cumCount); vector percentage(numSeqs); setUpOTUData(otuData, percentage, cumCount, tau, otuFreq, anP, anI); finishOTUData(otuData, otuFreq, anP, anI, cumCount, otuBySeqLookUp, aanI, tau); change.assign(numOTUs, 1); calcCentroids(anI, anP, change, centroids, cumCount, distances, seqFreq, otuFreq, tau); vector finalTau(numOTUs, 0); for(int i=0;i&); int addSeq(string, vector&); int getRedundantNames(string, vector&, vector&, vector&); int addRedundantName(string, string, vector&, vector&, vector&); int getDistanceData(string, vector&); int getListData(string, double, vector&, vector&, vector >&); int updateOTUCountData(vector, vector >, vector >, vector&, vector&, vector&); double calcNewWeights(vector&,vector,vector,vector,vector,vector,vector); int calcCentroids(vector,vector,vector&,vector&,vector,vector,vector,vector,vector); int checkCentroids(vector&, vector); int setUpOTUData(vector&, vector&, vector, vector, vector, vector, vector); int finishOTUData(vector, vector&, vector&, vector&, vector&, vector >&, vector >&, vector&); int writeOutput(string, string, string, vector, vector, vector, vector, vector, vector, vector, vector&); private: MothurOut* m; Utils util; int getLastMatch(char, vector >&, int, int, vector&, vector&); int countDiffs(vector, vector); vector convertSeq(string); string degapSeq(string); }; /**************************************************************************************************/ #endif mothur-1.48.0/source/sharedwriter.hpp000077500000000000000000000023501424121717000176400ustar00rootroot00000000000000// // SharedWriter.hpp // Mothur // // Created by Sarah Westcott on 12/7/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef SharedWriter_hpp #define SharedWriter_hpp #include "mothurout.h" #include "utils.hpp" /***********************************************************************/ class SynchronizedOutputFile { public: SynchronizedOutputFile (const string& p) : path(p) { util.openOutputFile(p, out); } SynchronizedOutputFile (const string& p, bool append) : path(p) { util.openOutputFileAppend(p, out); } ~SynchronizedOutputFile() { if (out.is_open()) { out.close(); } } //if we forgot to close() void write (const string& dataToWrite) { std::lock_guard lock((writerMutex)); // Ensure that only one thread can execute at a time out << dataToWrite; } void close() { if (out.is_open()) { out.close(); } } void setFixedShowPoint() { out.setf(ios::fixed, ios::showpoint); } void setPrecision(int p) { out << setprecision(p); } private: string path; std::mutex writerMutex; Utils util; ofstream out; }; /***********************************************************************/ #endif mothur-1.48.0/source/singlelinkage.cpp000077500000000000000000000016611424121717000177500ustar00rootroot00000000000000 #include "cluster.hpp" /***********************************************************************/ SingleLinkage::SingleLinkage(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string s, float a) : Cluster(rav, lv, dm, c, s, a) {} /***********************************************************************/ //This function returns the tag of the method. string SingleLinkage::getTag() { return("nn"); } /***********************************************************************/ //This function updates the distance based on the nearest neighbor method. bool SingleLinkage::updateDistance(PDistCell& colCell, PDistCell& rowCell) { try { bool changed = false; if (colCell.dist > rowCell.dist) { colCell.dist = rowCell.dist; } return(changed); } catch(exception& e) { m->errorOut(e, "SingleLinkage", "updateDistance"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/slibshuff.cpp000077500000000000000000000045131424121717000171200ustar00rootroot00000000000000/* * slibshuff.cpp * Mothur * * Created by Pat Schloss on 4/8/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "slibshuff.h" /***********************************************************************/ SLibshuff::SLibshuff(FullMatrix* D, int it, float co) : Libshuff(D, it, 0, co){} /***********************************************************************/ float SLibshuff::evaluatePair(int i, int j){ return sCalculate(i,j); } /***********************************************************************/ vector > SLibshuff::evaluateAll(){ try{ savedMins.resize(numGroups); vector > dCXYValues(numGroups); for(int i=0;ierrorOut(e, "SLibshuff", "evaluateAll"); exit(1); } } /***********************************************************************/ double SLibshuff::sCalculate(int x, int y){ try{ double sum = 0.0,t=0.0; minX = getMinX(x); if (m->getControl_pressed()) { return sum; } minXY = getMinXY(x,y); if (m->getControl_pressed()) { return sum; } sort(minX.begin(), minX.end()); if (m->getControl_pressed()) { return sum; } sort(minXY.begin(), minXY.end()); if (m->getControl_pressed()) { return sum; } int ix=0,iy=0; while( (ix < groupSizes[x]) && (iy < groupSizes[x]) ) { double h = (ix-iy)/double(groupSizes[x]); if(minX[ix] < minXY[iy]) { sum += (minX[ix] - t)*h*h; t = minX[ix++]; } else { sum += (minXY[iy] - t)*h*h; t = minXY[iy++]; } } if(ix < groupSizes[x]) { while(ix < groupSizes[x]) { double h = (ix-iy)/double(groupSizes[x]); sum += (minX[ix] - t)*h*h; t = minX[ix++]; } } else { while(iy < groupSizes[x]) { double h = (ix-iy)/double(groupSizes[x]); sum += (minXY[iy] - t)*h*h; t = minXY[iy++]; } } return sum; } catch(exception& e) { m->errorOut(e, "SLibshuff", "sCalculate"); exit(1); } } /***********************************************************************/ mothur-1.48.0/source/slibshuff.h000077500000000000000000000006541424121717000165670ustar00rootroot00000000000000#ifndef SLIBSHUFF #define SLIBSHUFF /* * slibshuff.h * Mothur * * Created by Pat Schloss on 4/8/09. * Copyright 2009 Patrick D. Schloss. All rights reserved. * */ #include "fullmatrix.h" #include "libshuff.h" class SLibshuff : public Libshuff { public: SLibshuff(FullMatrix*, int, float); vector > evaluateAll(); float evaluatePair(int, int); private: double sCalculate(int, int); }; #endif mothur-1.48.0/source/subsample.cpp000077500000000000000000001154331424121717000171320ustar00rootroot00000000000000// // subsample.cpp // Mothur // // Created by Sarah Westcott on 4/2/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "subsample.h" //********************************************************************************************************************** Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size, vector& mGroups) { try { //remove seqs not in sample from counttable vector Groups = ct->getNamesOfGroups(); if (mGroups.size() == 0) { mGroups = Groups; } newCt->copy(ct); newCt->addGroup("doNotIncludeMe"); map doNotIncludeTotals; vector namesSeqs = ct->getNamesOfSeqs(); for (int i = 0; i < namesSeqs.size(); i++) { doNotIncludeTotals[namesSeqs[i]] = 0; } for (int i = 0; i < Groups.size(); i++) { if (util.inUsersGroups(Groups[i], mGroups)) { if (m->getControl_pressed()) { break; } int thisSize = ct->getGroupCount(Groups[i]); if (thisSize >= size) { vector names = ct->getNamesOfSeqs(Groups[i]); vector random; for (int j = 0; j < names.size(); j++) { int num = ct->getGroupCount(names[j], Groups[i]); for (int k = 0; k < num; k++) { random.push_back(j); } } util.mothurRandomShuffle(random); vector sampleRandoms; sampleRandoms.resize(names.size(), 0); for (int j = 0; j < size; j++) { sampleRandoms[random[j]]++; } for (int j = 0; j < sampleRandoms.size(); j++) { newCt->setAbund(names[j], Groups[i], sampleRandoms[j]); } sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0); for (int j = size; j < thisSize; j++) { sampleRandoms[random[j]]++; } for (int j = 0; j < sampleRandoms.size(); j++) { doNotIncludeTotals[names[j]] += sampleRandoms[j]; } }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->setControl_pressed(true); } } } for (map::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) { newCt->setAbund(it->first, "doNotIncludeMe", it->second); } vector Treenames = T->getTreeNames(); Tree* newTree = new Tree(newCt, Treenames); newTree->getCopy(T, true); return newTree; } catch(exception& e) { m->errorOut(e, "SubSample", "getSample-Tree"); exit(1); } } //********************************************************************************************************************** Tree* SubSample::getSampleWithReplacement(Tree* T, CountTable* ct, CountTable* newCt, int size, vector& mGroups) { try { Tree* newTree = nullptr; //remove seqs not in sample from counttable vector Groups = ct->getNamesOfGroups(); if (mGroups.size() == 0) { mGroups = Groups; } newCt->copy(ct); newCt->addGroup("doNotIncludeMe"); map doNotIncludeTotals; vector namesSeqs = ct->getNamesOfSeqs(); for (int i = 0; i < namesSeqs.size(); i++) { doNotIncludeTotals[namesSeqs[i]] = 0; } for (int i = 0; i < Groups.size(); i++) { if (util.inUsersGroups(Groups[i], mGroups)) { if (m->getControl_pressed()) { break; } vector names = ct->getNamesOfSeqs(Groups[i]); vector random; for (int j = 0; j < names.size(); j++) { int num = ct->getGroupCount(names[j], Groups[i]); for (int k = 0; k < num; k++) { random.push_back(j); } } vector sampleRandoms; sampleRandoms.resize(names.size(), 0); long long totalNumReads = random.size()-1; set selected; for (int j = 0; j < size; j++) { //allows for multiple selection of same read //"grab random from bag" long long randomRead = util.getRandomIndex(totalNumReads); sampleRandoms[random[randomRead]]++; selected.insert(randomRead); } for (int j = 0; j < sampleRandoms.size(); j++) { //create new count file with updated sequence counts newCt->setAbund(names[j], Groups[i], sampleRandoms[j]); } //set unselected reads to "do not include" sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0); for (long long j = 0; j < random.size(); j++) { if (selected.count(j) == 0) { //we did not selected this read from random sampleRandoms[random[j]]++; } } for (int j = 0; j < sampleRandoms.size(); j++) { doNotIncludeTotals[names[j]] += sampleRandoms[j]; } } } for (map::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) { newCt->setAbund(it->first, "doNotIncludeMe", it->second); } vector Treenames = T->getTreeNames(); newTree = new Tree(newCt, Treenames); newTree->getCopy(T, true); return newTree; } catch(exception& e) { m->errorOut(e, "SubSample", "getSample-Tree"); exit(1); } } //********************************************************************************************************************** //assumes whole maps dupName -> uniqueName map SubSample::deconvolute(map whole, vector& wanted) { try { map nameMap; //whole will be empty if user gave no name file, so we don't need to make a new one if (whole.size() == 0) { return nameMap; } vector newWanted; for (int i = 0; i < wanted.size(); i++) { if (m->getControl_pressed()) { break; } string dupName = wanted[i]; map::iterator itWhole = whole.find(dupName); if (itWhole != whole.end()) { string repName = itWhole->second; //do we already have this rep? map::iterator itName = nameMap.find(repName); if (itName != nameMap.end()) { //add this seqs to dups list (itName->second) += "," + dupName; }else { //first sighting of this seq nameMap[repName] = dupName; newWanted.push_back(repName); } }else { m->mothurOut("[ERROR]: "+dupName+" is not in your name file, please correct.\n"); m->setControl_pressed(true); } } wanted = newWanted; return nameMap; } catch(exception& e) { m->errorOut(e, "SubSample", "deconvolute"); exit(1); } } //********************************************************************************************************************** set SubSample::getWeightedSample(map & nameMap, long long num) { try { set sampleNames; long long totalSeqs = nameMap.size(); if (totalSeqs < num) { m->mothurOut("[ERROR]: Requesting sample size larger than number of seqeunces, quitting.\n"); m->setControl_pressed(true); return sampleNames; } else if (totalSeqs == num) { for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { sampleNames.insert(it->first); } return sampleNames; } long long numSampled = 0; map > weights;//weight -> names of seqs with that weight map >::iterator itWeight; long long total = 0; for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { total += it->second; itWeight = weights.find(it->second); if (itWeight == weights.end()) { //this is a weight we haven't seen before set temp; temp.insert(it->first); weights[it->second] = temp; }else { weights[it->second].insert(it->first); //dup weight, combine to save memory } } //find running total long long runningTotal = 0; map > cumulative;//weight + sum so far -> names of seqs with that weight for (itWeight = weights.begin(); itWeight != weights.end(); itWeight++) { int count = itWeight->second.size(); //number of seqs with this weight runningTotal += itWeight->first * count; cumulative[runningTotal] = itWeight->second; } weights.clear(); while(numSampled != num) { long long index = util.getRandomIndex(total); //random index including weights map >::iterator itWeight = cumulative.lower_bound(index); sampleNames.insert(*itWeight->second.begin()); //save name in sample names itWeight->second.erase(itWeight->second.begin()); //remove seq since we sampled it if (itWeight->second.size() == 0) { cumulative.erase(itWeight); total = cumulative.rbegin()->first; } //remove this weight if all seqs are sampled. Reset bound if needed. numSampled = sampleNames.size(); } return sampleNames; } catch(exception& e) { m->errorOut(e, "SubSample", "getWeightedSample"); exit(1); } } //********************************************************************************************************************** vector SubSample::getSample(vector& rabunds, int size, vector currentLabels) { try { //save mothurOut's binLabels to restore for next label vector saveBinLabels = currentLabels; SharedRAbundVectors* newLookup = new SharedRAbundVectors(); int numBins = rabunds[0]->getNumBins(); for (int i = 0; i < rabunds.size(); i++) { int thisSize = rabunds[i]->getNumSeqs(); if (thisSize != size) { vector order; for (int j = 0; j < rabunds[i]->size(); j++) { int abund = rabunds[i]->get(j); for(int k=0;ksetLabel(rabunds[i]->getLabel()); temp->setGroup(rabunds[i]->getGroup()); for (int j = 0; j < size; j++) { //only allows you to select a read once if (m->getControl_pressed()) { return currentLabels; } temp->increment(order[j]); } newLookup->push_back(temp); }else { SharedRAbundVector* temp = new SharedRAbundVector(*rabunds[i]); newLookup->push_back(temp); } } newLookup->setOTUNames(currentLabels); newLookup->eliminateZeroOTUS(); for (int i = 0; i < rabunds.size(); i++) { delete rabunds[i]; } rabunds.clear(); rabunds = newLookup->getSharedRAbundVectors(); //save mothurOut's binLabels to restore for next label vector subsampleBinLabels = newLookup->getOTUNames(); delete newLookup; return subsampleBinLabels; } catch(exception& e) { m->errorOut(e, "SubSample", "getSample-shared"); exit(1); } } //********************************************************************************************************************** vector SubSample::getSampleWithReplacement(vector& rabunds, int size, vector currentLabels) { try { //save mothurOut's binLabels to restore for next label vector saveBinLabels = currentLabels; SharedRAbundVectors* newLookup = new SharedRAbundVectors(); int numBins = rabunds[0]->getNumBins(); for (int i = 0; i < rabunds.size(); i++) { vector order; for (int j = 0; j < rabunds[i]->size(); j++) { int abund = rabunds[i]->get(j); for(int k=0;ksetLabel(rabunds[i]->getLabel()); temp->setGroup(rabunds[i]->getGroup()); long long orderSize = order.size()-1; for (int j = 0; j < size; j++) { //allows you to select a read multiple times if (m->getControl_pressed()) { return currentLabels; } //"grab random from bag" long long randomRead = util.getRandomIndex(orderSize); temp->increment(order[randomRead]); } newLookup->push_back(temp); } newLookup->setOTUNames(currentLabels); newLookup->eliminateZeroOTUS(); for (int i = 0; i < rabunds.size(); i++) { delete rabunds[i]; } rabunds.clear(); rabunds = newLookup->getSharedRAbundVectors(); //save mothurOut's binLabels to restore for next label vector subsampleBinLabels = newLookup->getOTUNames(); delete newLookup; return subsampleBinLabels; } catch(exception& e) { m->errorOut(e, "SubSample", "getSampleWithReplacement-shared"); exit(1); } } //********************************************************************************************************************** vector SubSample::getSample(SharedRAbundVectors*& thislookup, int size) { try { //save mothurOut's binLabels to restore for next label vector saveBinLabels = thislookup->getOTUNames(); vector rabunds = thislookup->getSharedRAbundVectors(); vector subsampleBinLabels = getSample(rabunds, size, saveBinLabels); SharedRAbundVectors* newLookup = new SharedRAbundVectors(); for (int i = 0; i < rabunds.size(); i++) { newLookup->push_back(rabunds[i]); } newLookup->setOTUNames(subsampleBinLabels); delete thislookup; thislookup = newLookup; return subsampleBinLabels; } catch(exception& e) { m->errorOut(e, "SubSample", "getSample-shared"); exit(1); } } //********************************************************************************************************************** vector SubSample::getSampleWithReplacement(SharedRAbundVectors*& thislookup, int size) { try { //save mothurOut's binLabels to restore for next label vector saveBinLabels = thislookup->getOTUNames(); vector rabunds = thislookup->getSharedRAbundVectors(); vector subsampleBinLabels = getSampleWithReplacement(rabunds, size, saveBinLabels); SharedRAbundVectors* newLookup = new SharedRAbundVectors(); for (int i = 0; i < rabunds.size(); i++) { newLookup->push_back(rabunds[i]); } newLookup->setOTUNames(subsampleBinLabels); delete thislookup; thislookup = newLookup; return subsampleBinLabels; } catch(exception& e) { m->errorOut(e, "SubSample", "getSampleWithReplacement-shared"); exit(1); } } //********************************************************************************************************************** int SubSample::getSample(SAbundVector*& sabund, int size) { try { int numBins = sabund->getNumBins(); int thisSize = sabund->getNumSeqs(); OrderVector order = sabund->getOrderVector(); if (thisSize > size) { util.mothurRandomShuffle(order); RAbundVector rabund(numBins); rabund.setLabel(sabund->getLabel()); for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return 0; } int abund = rabund.get(order.get(j)); rabund.set(order.get(j), (abund+1)); } delete sabund; sabund = new SAbundVector(); *sabund = rabund.getSAbundVector(); }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** int SubSample::getSampleWithReplacement(SAbundVector*& sabund, int size) { try { int numBins = sabund->getNumBins(); int thisSize = sabund->getNumSeqs(); OrderVector order = sabund->getOrderVector(); if (thisSize > size) { RAbundVector rabund(numBins); rabund.setLabel(sabund->getLabel()); long long orderSize = order.size()-1; for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return 0; } //"grab random from bag" long long randomRead = util.getRandomIndex(orderSize); int abund = rabund.get(order.get(randomRead)); rabund.set(order.get(randomRead), (abund+1)); } delete sabund; sabund = new SAbundVector(); *sabund = rabund.getSAbundVector(); }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSampleWithReplacement"); exit(1); } } //********************************************************************************************************************** int SubSample::getSample(RAbundVector*& rabund, int size) { try { int numBins = rabund->getNumBins(); int thisSize = rabund->getNumSeqs(); OrderVector order = rabund->getOrderVector(nullptr); if (thisSize > size) { util.mothurRandomShuffle(order); RAbundVector sampledRabund(numBins); sampledRabund.setLabel(rabund->getLabel()); for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return 0; } int abund = sampledRabund.get(order.get(j)); sampledRabund.set(order.get(j), (abund+1)); } delete rabund; rabund = new RAbundVector(sampledRabund); }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the rabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your rabund vector.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** int SubSample::getSampleWithReplacement(RAbundVector*& rabund, int size) { try { int numBins = rabund->getNumBins(); int thisSize = rabund->getNumSeqs(); OrderVector order = rabund->getOrderVector(nullptr); if (thisSize > size) { RAbundVector sampledRabund(numBins); sampledRabund.setLabel(rabund->getLabel()); long long orderSize = order.size()-1; for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return 0; } //"grab random from bag" long long randomRead = util.getRandomIndex(orderSize); int abund = sampledRabund.get(order.get(randomRead)); sampledRabund.set(order.get(randomRead), (abund+1)); } delete rabund; rabund = new RAbundVector(sampledRabund); }else if (thisSize < size) { m->mothurOut("[ERROR]: The size you requested is larger than the number of sequences in the sabund vector. You requested " + toString(size) + " and you only have " + toString(thisSize) + " seqs in your sabund vector.\n"); m->setControl_pressed(true); } return 0; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSampleWithReplacement"); exit(1); } } //********************************************************************************************************************** CountTable SubSample::getSample(CountTable& ct, int size, vector Groups, bool persample) { try { if (!persample) { return (getSample(ct, size, Groups)); } if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: Cannot subsample by group because your count table doesn't have group information.\n"); m->setControl_pressed(true); } CountTable sampledCt; map > tempCount; for (int i = 0; i < Groups.size(); i++) { sampledCt.addGroup(Groups[i]); vector names = ct.getNamesOfSeqs(Groups[i]); vector allNames; for (int j = 0; j < names.size(); j++) { if (m->getControl_pressed()) { return sampledCt; } int num = ct. getGroupCount(names[j], Groups[i]); for (int k = 0; k < num; k++) { allNames.push_back(j); } } util.mothurRandomShuffle(allNames); if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->setControl_pressed(true); } else{ for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } map >::iterator it = tempCount.find(names[allNames[j]]); if (it == tempCount.end()) { //we have not seen this sequence at all yet vector tempGroups; tempGroups.resize(Groups.size(), 0); tempGroups[i]++; tempCount[names[allNames[j]]] = tempGroups; }else{ tempCount[names[allNames[j]]][i]++; } } } } //build count table for (map >::iterator it = tempCount.begin(); it != tempCount.end();) { sampledCt.push_back(it->first, it->second); tempCount.erase(it++); } return sampledCt; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** CountTable SubSample::getSampleWithReplacement(CountTable& ct, int size, vector Groups, bool persample) { try { if (!persample) { return (getSampleWithReplacement(ct, size, Groups)); } if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: Cannot subsample by group because your count table doesn't have group information.\n"); m->setControl_pressed(true); } CountTable sampledCt; map > tempCount; for (int i = 0; i < Groups.size(); i++) { sampledCt.addGroup(Groups[i]); vector names = ct.getNamesOfSeqs(Groups[i]); vector allNames; for (int j = 0; j < names.size(); j++) { if (m->getControl_pressed()) { return sampledCt; } int num = ct.getGroupCount(names[j], Groups[i]); for (int k = 0; k < num; k++) { allNames.push_back(j); } } long long allNamesSize = allNames.size()-1; if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->setControl_pressed(true); } else{ for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } //"grab random from bag" long long randomRead = util.getRandomIndex(allNamesSize); map >::iterator it = tempCount.find(names[allNames[randomRead]]); if (it == tempCount.end()) { //we have not seen this sequence at all yet vector tempGroups; tempGroups.resize(Groups.size(), 0); tempGroups[i]++; tempCount[names[allNames[randomRead]]] = tempGroups; }else{ tempCount[names[allNames[randomRead]]][i]++; } } } } //build count table for (map >::iterator it = tempCount.begin(); it != tempCount.end();) { sampledCt.push_back(it->first, it->second); tempCount.erase(it++); } return sampledCt; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSampleWithReplacement"); exit(1); } } //********************************************************************************************************************** GroupMap SubSample::getSample(GroupMap& groupMap, int size, vector Groups, bool persample) { try { if (!persample) { return (getSample(groupMap, size, Groups)); } GroupMap sampledGM; //initialize counts map groupCounts; map::iterator itGroupCounts; for (int i = 0; i < Groups.size(); i++) { groupCounts[Groups[i]] = 0; } for (int i = 0; i < Groups.size(); i++) { if (m->getControl_pressed()) { break; } string thisGroup = Groups[i]; int thisSize = groupMap.getNumSeqs(thisGroup); if (thisSize >= size) { vector names = groupMap.getNamesSeqs(thisGroup); util.mothurRandomShuffle(names); for (int j = 0; j < size; j++) { sampledGM.addSeq(names[j], thisGroup); } }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->setControl_pressed(true); } } return sampledGM; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** GroupMap SubSample::getSample(GroupMap& groupMap, int size, vector Groups) { try { GroupMap sampledGM; int thisSize = groupMap.getNumSeqs(); if (thisSize >= size) { vector names = groupMap.getNamesSeqs(); util.mothurRandomShuffle(names); int numSelected = 0; for (int j = 0; j < names.size(); j++) { string thisGroup = groupMap.getGroup(names[j]); if (util.inUsersGroups(thisGroup, Groups)) { sampledGM.addSeq(names[j], thisGroup); numSelected++; } //do we have enough?? if (numSelected == size) { break; } } }else { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); } return sampledGM; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** GroupMap SubSample::getSample(GroupMap& groupMap, int size) { try { GroupMap sampledGM; int thisSize = groupMap.getNumSeqs(); if (thisSize >= size) { vector names = groupMap.getNamesSeqs(); util.mothurRandomShuffle(names); for (int j = 0; j < size; j++) { string thisGroup = groupMap.getGroup(names[j]); sampledGM.addSeq(names[j], thisGroup); } }else { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); } return sampledGM; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** CountTable SubSample::getSample(CountTable& ct, int size, vector Groups) { try { CountTable sampledCt; if (!ct.hasGroupInfo() && (Groups.size() != 0)) { m->mothurOut("[ERROR]: Cannot subsample with groups because your count table doesn't have group information.\n"); m->setControl_pressed(true); return sampledCt; } if (ct.hasGroupInfo()) { //only select reads from Groups map > tempCount; for (int i = 0; i < Groups.size(); i++) { sampledCt.addGroup(Groups[i]); } vector names = ct.getNamesOfSeqs(Groups); //names of sequences in groups vector allNames; for (int j = 0; j < names.size(); j++) { if (m->getControl_pressed()) { return sampledCt; } for (int i = 0; i < Groups.size(); i++) { int num = ct.getGroupCount(names[j], Groups[i]); //num reads in this group from this seq intPair thisSeq(j,i); for (int k = 0; k < num; k++) { allNames.push_back(thisSeq); } } } util.mothurRandomShuffle(allNames); if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); } else{ for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } map >::iterator it = tempCount.find(names[allNames[j].abund]); if (it == tempCount.end()) { //we have not seen this sequence at all yet vector tempGroups; tempGroups.resize(Groups.size(), 0); tempGroups[allNames[j].group]++; tempCount[names[allNames[j].abund]] = tempGroups; }else{ tempCount[names[allNames[j].abund]][allNames[j].group]++; } } } //build count table for (map >::iterator it = tempCount.begin(); it != tempCount.end();) { sampledCt.push_back(it->first, it->second); tempCount.erase(it++); } }else { //no groups vector names = ct.getNamesOfSeqs(); map nameMap; vector allNames; for (int i = 0; i < names.size(); i++) { int num = ct.getNumSeqs(names[i]); for (int j = 0; j < num; j++) { allNames.push_back(i); } } if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); return sampledCt; } else { util.mothurRandomShuffle(allNames); for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } map::iterator it = nameMap.find(allNames[j]); //we have not seen this sequence at all yet if (it == nameMap.end()) { nameMap[allNames[j]] = 1; } else{ nameMap[allNames[j]]++; } } //build count table for (map::iterator it = nameMap.begin(); it != nameMap.end();) { sampledCt.push_back(names[it->first], it->second); nameMap.erase(it++); } } } return sampledCt; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSample"); exit(1); } } //********************************************************************************************************************** CountTable SubSample::getSampleWithReplacement(CountTable& ct, int size, vector Groups) { try { CountTable sampledCt; if (!ct.hasGroupInfo() && (Groups.size() != 0)) { m->mothurOut("[ERROR]: Cannot subsample with groups because your count table doesn't have group information.\n"); m->setControl_pressed(true); return sampledCt; } if (ct.hasGroupInfo()) { //only select reads from Groups map > tempCount; for (int i = 0; i < Groups.size(); i++) { sampledCt.addGroup(Groups[i]); } vector names = ct.getNamesOfSeqs(Groups); //names of sequences in groups vector allNames; for (int j = 0; j < names.size(); j++) { if (m->getControl_pressed()) { return sampledCt; } for (int i = 0; i < Groups.size(); i++) { int num = ct.getGroupCount(names[j], Groups[i]); //num reads in this group from this seq intPair thisSeq(j,i); for (int k = 0; k < num; k++) { allNames.push_back(thisSeq); } } } if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); } else{ long long allNamesSize = allNames.size()-1; for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } long long randomRead = util.getRandomIndex(allNamesSize); map >::iterator it = tempCount.find(names[allNames[randomRead].abund]); if (it == tempCount.end()) { //we have not seen this sequence at all yet vector tempGroups; tempGroups.resize(Groups.size(), 0); tempGroups[allNames[randomRead].group]++; tempCount[names[allNames[randomRead].abund]] = tempGroups; }else{ tempCount[names[allNames[randomRead].abund]][allNames[randomRead].group]++; } } } //build count table for (map >::iterator it = tempCount.begin(); it != tempCount.end();) { sampledCt.push_back(it->first, it->second); tempCount.erase(it++); } }else { //no groups vector names = ct.getNamesOfSeqs(); map nameMap; vector allNames; for (int i = 0; i < names.size(); i++) { int num = ct.getNumSeqs(names[i]); for (int j = 0; j < num; j++) { allNames.push_back(i); } } if (allNames.size() < size) { m->mothurOut("[ERROR]: You have selected a size that is larger than the number of sequences.\n"); m->setControl_pressed(true); return sampledCt; } else { long long allNamesSize = allNames.size()-1; for (int j = 0; j < size; j++) { if (m->getControl_pressed()) { return sampledCt; } long long randomRead = util.getRandomIndex(allNamesSize); map::iterator it = nameMap.find(allNames[randomRead]); //we have not seen this sequence at all yet if (it == nameMap.end()) { nameMap[allNames[randomRead]] = 1; } else{ nameMap[allNames[randomRead]]++; } } //build count table for (map::iterator it = nameMap.begin(); it != nameMap.end();) { sampledCt.push_back(names[it->first], it->second); nameMap.erase(it++); } } } return sampledCt; } catch(exception& e) { m->errorOut(e, "SubSampleCommand", "getSampleWithReplacement"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/subsample.h000077500000000000000000000103351424121717000165720ustar00rootroot00000000000000#ifndef Mothur_subsample_h #define Mothur_subsample_h // // subsample.h // Mothur // // Created by Sarah Westcott on 4/2/12. // Copyright (c) 2012 Schloss Lab. All rights reserved. // #include "mothurout.h" #include "rabundvector.hpp" #include "ordervector.hpp" #include "treemap.h" #include "tree.h" #include "counttable.h" #include "sharedrabundvectors.hpp" //subsampling overwrites the sharedRabunds. If you need to reuse the original use the getSamplePreserve function. class SubSample { public: SubSample() { m = MothurOut::getInstance(); } ~SubSample() = default; vector getSample(SharedRAbundVectors*&, int); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first. vector getSampleWithReplacement(SharedRAbundVectors*&, int); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first. vector getSample(vector&, int, vector); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first. vector getSampleWithReplacement(vector&, int, vector); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first. Tree* getSample(Tree*, CountTable*, CountTable*, int, vector&); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe". Tree* getSampleWithReplacement(Tree*, CountTable*, CountTable*, int, vector&); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe". int getSample(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it int getSampleWithReplacement(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it int getSample(RAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it int getSampleWithReplacement(RAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it CountTable getSample(CountTable&, int, vector, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in. CountTable getSampleWithReplacement(CountTable&, int, vector, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in. GroupMap getSample(GroupMap&, int, vector groupsWanted, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in. GroupMap getSample(GroupMap&, int size); //returns subsampled GroupMap with 'size' members set getWeightedSample(map&, long long); //map of sequence names -> weight (could be abundance or some other measure), num to sample private: MothurOut* m; Utils util; map deconvolute(map wholeSet, vector& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap. GroupMap getSample(GroupMap&, int, vector groupsWanted); CountTable getSample(CountTable&, int, vector); //subsample a countTable bygroup(same number sampled from each group), returns subsampled countTable CountTable getSampleWithReplacement(CountTable&, int, vector); //subsample a countTable bygroup(same number sampled from each group), returns subsampled countTable }; #endif mothur-1.48.0/source/summary.cpp000077500000000000000000001341211424121717000166270ustar00rootroot00000000000000// // summary.cpp // Mothur // // Created by Sarah Westcott on 3/27/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #include "summary.hpp" #include "contigsreport.hpp" #include "alignreport.hpp" //********************************************************************************************************************** void Summary::processNameCount(string n) { //name or count file to include in counts try { nameMap.clear(); nameCountNumUniques = 0; hasNameOrCount = false; if (n != "") { hasNameOrCount = true; if (isCountFile(n)) { CountTable ct; ct.readTable(n, false, false); nameMap = ct.getNameMap(); type = "count"; }else { Utils util; nameMap = util.readNames(n); type = "name"; } } nameCountNumUniques = nameMap.size(); } catch(exception& e) { m->errorOut(e, "Summary", "Summary"); exit(1); } } //********************************************************************************************************************** bool Summary::isCountFile(string inputfile){ try { CountTable ct; bool isCount = ct.isCountTable(inputfile); return isCount; } catch(exception& e) { m->errorOut(e, "Summary", "isCountFile"); exit(1); } } //********************************************************************************************************************** vector Summary::getDefaults() { try { vector locations; long long ptile0_25 = 1+(long long)(total * 0.025); //number of sequences at 2.5% long long ptile25 = 1+(long long)(total * 0.250); //number of sequences at 25% long long ptile50 = 1+(long long)(total * 0.500); long long ptile75 = 1+(long long)(total * 0.750); long long ptile97_5 = 1+(long long)(total * 0.975); long long ptile100 = (long long)(total); locations.push_back(1); locations.push_back(ptile0_25); locations.push_back(ptile25); locations.push_back(ptile50); locations.push_back(ptile75); locations.push_back(ptile97_5); locations.push_back(ptile100); return locations; } catch(exception& e) { m->errorOut(e, "Summary", "getDefaults"); exit(1); } } //********************************************************************************************************************** vector Summary::getValues(map& positions) { try { vector defaults = getDefaults(); vector results; results.resize(7,0); long long meanPosition; meanPosition = 0; long long totalSoFar = 0; int lastValue = 0; //minimum if ((positions.begin())->first == -1) { results[0] = 0; } else {results[0] = (positions.begin())->first; } results[1] = results[0]; results[2] = results[0]; results[3] = results[0]; results[4] = results[0]; results[5] = results[0]; for (map::iterator it = positions.begin(); it != positions.end(); it++) { int value = it->first; if (value == -1) { value = 0; } meanPosition += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= defaults[1]) && (totalSoFar > 1)) || ((lastValue < defaults[1]) && (totalSoFar > defaults[1]))){ results[1] = value; } //save value if (((totalSoFar <= defaults[2]) && (totalSoFar > defaults[1])) || ((lastValue < defaults[2]) && (totalSoFar > defaults[2]))) { results[2] = value; } //save value if (((totalSoFar <= defaults[3]) && (totalSoFar > defaults[2])) || ((lastValue < defaults[3]) && (totalSoFar > defaults[3]))) { results[3] = value; } //save value if (((totalSoFar <= defaults[4]) && (totalSoFar > defaults[3])) || ((lastValue < defaults[4]) && (totalSoFar > defaults[4]))) { results[4] = value; } //save value if (((totalSoFar <= defaults[5]) && (totalSoFar > defaults[4])) || ((lastValue < defaults[5]) && (totalSoFar > defaults[5]))) { results[5] = value; } //save value if ((totalSoFar <= defaults[6]) && (totalSoFar > defaults[5])) { results[6] = value; } //save value lastValue = totalSoFar; } results[6] = (positions.rbegin())->first; double meansPosition = meanPosition / (double) total; results.push_back(meansPosition); return results; } catch(exception& e) { m->errorOut(e, "Summary", "getValues"); exit(1); } } //********************************************************************************************************************** long long Summary::getValue(map& spots, double value) { try { long long percentage = 1+(long long)(total * value * 0.01); long long result = 0; long long totalSoFar = 0; long long lastValue = 0; //minimum if ((spots.begin())->first == -1) { result = 0; } else {result = (spots.begin())->first; } for (it = spots.begin(); it != spots.end(); it++) { long long value = it->first; if (value == -1) { value = 0; } totalSoFar += it->second; if (((totalSoFar <= percentage) && (totalSoFar > 1)) || ((lastValue < percentage) && (totalSoFar > percentage))){ result = value; } //save value lastValue = totalSoFar; } return result; } catch(exception& e) { m->errorOut(e, "Summary", "getValue"); exit(1); } } //********************************************************************************************************************** vector Summary::getValues(map& positions) { try { vector defaults = getDefaults(); vector results; results.resize(7,0); long long meanPosition; meanPosition = 0; long long totalSoFar = 0; int lastValue = 0; //minimum if (util.isEqual((positions.begin())->first, -1)) { results[0] = 0; } else {results[0] = (positions.begin())->first; } results[1] = results[0]; results[2] = results[0]; results[3] = results[0]; results[4] = results[0]; results[5] = results[0]; for (map::iterator it = positions.begin(); it != positions.end(); it++) { long long value = it->first; if (value == -1) { value = 0; } meanPosition += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= defaults[1]) && (totalSoFar > 1)) || ((lastValue < defaults[1]) && (totalSoFar > defaults[1]))){ results[1] = value; } //save value if (((totalSoFar <= defaults[2]) && (totalSoFar > defaults[1])) || ((lastValue < defaults[2]) && (totalSoFar > defaults[2]))) { results[2] = value; } //save value if (((totalSoFar <= defaults[3]) && (totalSoFar > defaults[2])) || ((lastValue < defaults[3]) && (totalSoFar > defaults[3]))) { results[3] = value; } //save value if (((totalSoFar <= defaults[4]) && (totalSoFar > defaults[3])) || ((lastValue < defaults[4]) && (totalSoFar > defaults[4]))) { results[4] = value; } //save value if (((totalSoFar <= defaults[5]) && (totalSoFar > defaults[4])) || ((lastValue < defaults[5]) && (totalSoFar > defaults[5]))) { results[5] = value; } //save value if ((totalSoFar <= defaults[6]) && (totalSoFar > defaults[5])) { results[6] = value; } //save value lastValue = totalSoFar; } results[6] = (positions.rbegin())->first; double meansPosition = meanPosition / (double) total; results.push_back(meansPosition); return results; } catch(exception& e) { m->errorOut(e, "Summary", "getValues"); exit(1); } } //********************************************************************************************************************** long long Summary::getValue(map& positions, double value) { try { long long percentage = 1+(long long)(total * value * 0.01); long long result = 0; long long totalSoFar = 0; long long lastValue = 0; //minimum if (util.isEqual((positions.begin())->first, -1)) { result = 0; } else { result = (positions.begin())->first; } for (map::iterator it = positions.begin(); it != positions.end(); it++) { long long value = it->first; if (value == -1) { value = 0; } totalSoFar += it->second; if (((totalSoFar <= percentage) && (totalSoFar > 1)) || ((lastValue < percentage) && (totalSoFar > percentage))){ result = value; } //save value lastValue = totalSoFar; } return result; } catch(exception& e) { m->errorOut(e, "Summary", "getValue"); exit(1); } } //************************************************************************************************** int Summary::getMaxAbundance(){ int max = 0; for(map::iterator it=nameMap.begin();it!=nameMap.end();it++){ if(it->second > max){ max = it->second; } } return max; } //************************************************************************************************** long long Summary::summarizeFasta(string fastafile, string n, string output) { try { //fill namemap processNameCount(n); return (summarizeFasta(fastafile, output)); } catch(exception& e) { m->errorOut(e, "Summary", "getHomop"); exit(1); } } //********************************************************************************************************************** void driverSummarize(seqSumData* params) { //(string fastafile, string output, linePair lines) { try { ofstream out; if (params->summaryFile != "") { params->util.openOutputFile(params->summaryFile, out); } ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //print header if you are process 0 if (params->start == 0) { params->util.zapGremlins(in); gobble(in); //print header if you are process 0 if (params->summaryFile != "") { out << "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs" << endl; } } bool done = false; params->count = 0; while (!done) { if (params->m->getControl_pressed()) { break; } Sequence seq(in); gobble(in); if (seq.getName() != "") { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + seq.getName() + "\t" + toString(seq.getStartPos()) + "\t" + toString(seq.getEndPos()) + "\t" + toString(seq.getNumBases()) + "\n"); } //string seqInfo = addSeq(current); params->count++; long long num = 1; if (params->hasNameMap) { //make sure this sequence is in the namefile, else error map::iterator itFindName = params->nameMap.find(seq.getName()); if (itFindName == params->nameMap.end()) { params->m->mothurOut("[ERROR]: '" + seq.getName() + "' is not in your name or count file, please correct.\n"); params->m->setControl_pressed(true); } else { num = itFindName->second; } } int thisStartPosition = seq.getStartPos(); map::iterator it = params->startPosition.find(thisStartPosition); if (it == params->startPosition.end()) { params->startPosition[thisStartPosition] = num; } //first finding of this start position, set count. else { it->second += num; } //add counts int thisEndPosition = seq.getEndPos(); it = params->endPosition.find(thisEndPosition); if (it == params->endPosition.end()) { params->endPosition[thisEndPosition] = num; } //first finding of this end position, set count. else { it->second += num; } //add counts int thisSeqLength = seq.getNumBases(); it = params->seqLength.find(thisSeqLength); if (it == params->seqLength.end()) { params->seqLength[thisSeqLength] = num; } //first finding of this length, set count. else { it->second += num; } //add counts int thisAmbig = seq.getAmbigBases(); it = params->ambigBases.find(thisAmbig); if (it == params->ambigBases.end()) { params->ambigBases[thisAmbig] = num; } //first finding of this ambig, set count. else { it->second += num; } //add counts int thisHomoP = seq.getLongHomoPolymer(); it = params->longHomoPolymer.find(thisHomoP); if (it == params->longHomoPolymer.end()) { params->longHomoPolymer[thisHomoP] = num; } //first finding of this homop, set count. else { it->second += num; } //add counts int numns = seq.getNumNs(); it = params->numNs.find(numns); if (it == params->numNs.end()) { params->numNs[numns] = num; } //first finding of this homop, set count. else { it->second += num; } //add counts params->total += num; string seqInfo = ""; seqInfo += seq.getName() + '\t'; seqInfo += toString(thisStartPosition) + '\t' + toString(thisEndPosition) + '\t'; seqInfo += toString(thisSeqLength) + '\t' + toString(thisAmbig) + '\t'; seqInfo += toString(thisHomoP) + '\t' + toString(num); if (params->summaryFile != "") { out << seqInfo << endl; } } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->count == params->end) { break; } #endif } if (params->summaryFile != "") { out.close(); } in.close(); } catch(exception& e) { params->m->errorOut(e, "Summary", "driverSummarize"); exit(1); } } //********************************************************************************************************************** long long Summary::summarizeFasta(string fastafile, string output) { try { long long num = 0; vector lines; vector positions; #if defined NON_WINDOWS positions = util.divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosFasta(fastafile, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { string extension = ""; extension = toString(i) + ".temp"; string outputName = output + extension; if (output == "") { outputName = ""; } seqSumData* dataBundle = new seqSumData(fastafile, outputName, lines[i+1].start, lines[i+1].end, hasNameOrCount, nameMap); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverSummarize, dataBundle)); } seqSumData* dataBundle = new seqSumData(fastafile, output, lines[0].start, lines[0].end, hasNameOrCount, nameMap); driverSummarize(dataBundle); num = dataBundle->count; total = dataBundle->total; startPosition = dataBundle->startPosition; endPosition = dataBundle->endPosition; seqLength = dataBundle->seqLength; ambigBases = dataBundle->ambigBases; longHomoPolymer = dataBundle->longHomoPolymer; numNs = dataBundle->numNs; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; total += data[i]->total; for (map::iterator it = data[i]->startPosition.begin(); it != data[i]->startPosition.end(); it++) { map::iterator itMain = startPosition.find(it->first); if (itMain == startPosition.end()) { //newValue startPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->endPosition.begin(); it != data[i]->endPosition.end(); it++) { map::iterator itMain = endPosition.find(it->first); if (itMain == endPosition.end()) { //newValue endPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->seqLength.begin(); it != data[i]->seqLength.end(); it++) { map::iterator itMain = seqLength.find(it->first); if (itMain == seqLength.end()) { //newValue seqLength[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->ambigBases.begin(); it != data[i]->ambigBases.end(); it++) { map::iterator itMain = ambigBases.find(it->first); if (itMain == ambigBases.end()) { //newValue ambigBases[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->longHomoPolymer.begin(); it != data[i]->longHomoPolymer.end(); it++) { map::iterator itMain = longHomoPolymer.find(it->first); if (itMain == longHomoPolymer.end()) { //newValue longHomoPolymer[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->numNs.begin(); it != data[i]->numNs.end(); it++) { map::iterator itMain = numNs.find(it->first); if (itMain == numNs.end()) { //newValue numNs[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } delete data[i]; delete workerThreads[i]; } //append files for (int i = 0; i < processors-1; i++) { string extension = ""; extension = toString(i) + ".temp"; string outputName = output + extension; if (output == "") { outputName = ""; } if (outputName != "") { util.appendFiles((output + toString(i) + ".temp"), output); util.mothurRemove((output + toString(i) + ".temp")); } } if (!m->getControl_pressed()) { if (hasNameOrCount) { if (nameCountNumUniques != num) { // do fasta and name/count files match m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(nameCountNumUniques) + " unique sequences, but your fasta file contains " + toString(num) + ". File mismatch detected, quitting command.\n"); m->setControl_pressed(true); } } } numUniques = num; return num; } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFasta"); exit(1); } } //********************************************************************************************************************** long long Summary::summarizeFastaSummary(string summaryfile, string n) { try { //fill namemap processNameCount(n); return (summarizeFastaSummary(summaryfile)); } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFastaSummary"); exit(1); } } //********************************************************************************************************************** void driverFastaSummarySummarize(seqSumData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //print header if you are process 0 if (params->start == 0) { params->util.zapGremlins(in); params->util.getline(in); gobble(in); params->count++; } bool done = false; string name; int start, end, length, ambigs, polymer; long long numReps; while (!done) { if (params->m->getControl_pressed()) { break; } //seqname start end nbases ambigs polymer numSeqs in >> name >> start >> end >> length >> ambigs >> polymer >> numReps; gobble(in); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + name + "\t" + toString(start) + "\t" + toString(end) + "\t" + toString(length) + "\n"); } if (name != "") { if ((numReps == 1) && params->hasNameMap) { //make sure this sequence is in the namefile, else error map::iterator itFindName = params->nameMap.find(name); if (itFindName == params->nameMap.end()) { params->m->mothurOut("[ERROR]: '" + name + "' is not in your name or count file, please correct."); params->m->mothurOutEndLine(); params->m->setControl_pressed(true); } else { numReps = itFindName->second; } } map::iterator it = params->startPosition.find(start); if (it == params->startPosition.end()) { params->startPosition[start] = numReps; } //first finding of this start position, set count. else { it->second += numReps; } //add counts it = params->endPosition.find(end); if (it == params->endPosition.end()) { params->endPosition[end] = numReps; } //first finding of this end position, set count. else { it->second += numReps; } //add counts it = params->seqLength.find(length); if (it == params->seqLength.end()) { params->seqLength[length] = numReps; } //first finding of this length, set count. else { it->second += numReps; } //add counts it = params->ambigBases.find(ambigs); if (it == params->ambigBases.end()) { params->ambigBases[ambigs] = numReps; } //first finding of this ambig, set count. else { it->second += numReps; } //add counts it = params->longHomoPolymer.find(polymer); if (it == params->longHomoPolymer.end()) { params->longHomoPolymer[polymer] = numReps; } //first finding of this homop, set count. else { it->second += numReps; } //add counts params->count++; params->total += numReps; } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->end == params->count) { break; } #endif } in.close(); } catch(exception& e) { params-> m->errorOut(e, "Summary", "driverFastaSummarySummarize"); exit(1); } } /**********************************************************************************************************************/ long long Summary::summarizeFastaSummary(string summaryfile) { try { long long num = 0; vector positions; vector lines; #if defined NON_WINDOWS positions = util.divideFilePerLine(summaryfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosEachLine(summaryfile, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { seqSumData* dataBundle = new seqSumData(summaryfile, lines[i+1].start, lines[i+1].end, hasNameOrCount, nameMap); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverFastaSummarySummarize, dataBundle)); } seqSumData* dataBundle = new seqSumData(summaryfile, lines[0].start, lines[0].end, hasNameOrCount, nameMap); driverFastaSummarySummarize(dataBundle); num = dataBundle->count-1; //header line total = dataBundle->total; startPosition = dataBundle->startPosition; endPosition = dataBundle->endPosition; seqLength = dataBundle->seqLength; ambigBases = dataBundle->ambigBases; longHomoPolymer = dataBundle->longHomoPolymer; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; total += data[i]->total; for (map::iterator it = data[i]->startPosition.begin(); it != data[i]->startPosition.end(); it++) { map::iterator itMain = startPosition.find(it->first); if (itMain == startPosition.end()) { //newValue startPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->endPosition.begin(); it != data[i]->endPosition.end(); it++) { map::iterator itMain = endPosition.find(it->first); if (itMain == endPosition.end()) { //newValue endPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->seqLength.begin(); it != data[i]->seqLength.end(); it++) { map::iterator itMain = seqLength.find(it->first); if (itMain == seqLength.end()) { //newValue seqLength[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->ambigBases.begin(); it != data[i]->ambigBases.end(); it++) { map::iterator itMain = ambigBases.find(it->first); if (itMain == ambigBases.end()) { //newValue ambigBases[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->longHomoPolymer.begin(); it != data[i]->longHomoPolymer.end(); it++) { map::iterator itMain = longHomoPolymer.find(it->first); if (itMain == longHomoPolymer.end()) { //newValue longHomoPolymer[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } delete data[i]; delete workerThreads[i]; } if (hasNameOrCount) { if (nameCountNumUniques != num) { // do fasta and name/count files match m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(nameCountNumUniques) + " unique sequences, but your fasta file contains " + toString(num) + ". File mismatch detected, quitting command.\n"); m->setControl_pressed(true); } } numUniques = num; return num; } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFastaSummary"); exit(1); } } //********************************************************************************************************************** long long Summary::summarizeContigsSummary(string summaryfile, string n) { try { //fill namemap processNameCount(n); return (summarizeContigsSummary(summaryfile)); } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFastaSummary"); exit(1); } } //********************************************************************************************************************** void driverContigsSummarySummarize(seqSumData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); //print header if you are process 0 ContigsReport report; if (params->start == 0) { params->util.zapGremlins(in); report.readHeaders(in); gobble(in); params->count++; } bool done = false; while (!done) { if (params->m->getControl_pressed()) { break; } report.read(in); gobble(in); string name = report.getName(); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + name + "\t" + toString(report.getOverlapStart()) + "\t" + toString(report.getOverlapEnd()) + "\t" + toString(report.getLength()) + "\n"); } if (name != "") { long long numReps = 1; if (params->hasNameMap) { //make sure this sequence is in the namefile, else error map::iterator itFindName = params->nameMap.find(name); if (itFindName == params->nameMap.end()) { params->m->mothurOut("[ERROR]: '" + name + "' is not in your name or count file, please correct."); params->m->mothurOutEndLine(); params->m->setControl_pressed(true); } else { numReps = itFindName->second; } } int overlapStart = report.getOverlapStart(); map::iterator it = params->ostartPosition.find(overlapStart); if (it == params->ostartPosition.end()) { params->ostartPosition[overlapStart] = numReps; } //first finding of this start position, set count. else { it->second += numReps; } //add counts int overlapEnd = report.getOverlapEnd(); it = params->oendPosition.find(overlapEnd); if (it == params->oendPosition.end()) { params->oendPosition[overlapEnd] = numReps; } //first finding of this end position, set count. else { it->second += numReps; } //add counts int overlapLength = report.getOverlapLength(); it = params->oseqLength.find(overlapLength); if (it == params->oseqLength.end()) { params->oseqLength[overlapLength] = numReps; } //first finding of this length, set count. else { it->second += numReps; } //add counts int length = report.getLength(); it = params->seqLength.find(length); if (it == params->seqLength.end()) { params->seqLength[length] = numReps; } //first finding of this length, set count. else { it->second += numReps; } //add counts int numMisMatches = report.getMisMatches(); it = params->misMatches.find(numMisMatches); if (it == params->misMatches.end()) { params->misMatches[numMisMatches] = numReps; } //first finding of this ambig, set count. else { it->second += numReps; } //add counts int numNs = report.getNumNs(); it = params->numNs.find(numNs); if (it == params->numNs.end()) { params->numNs[numNs] = numReps; } //first finding of this homop, set count. else { it->second += numReps; } //add counts params->count++; params->total += numReps; } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->end == params->count) { break; } #endif } in.close(); } catch(exception& e) { params->m->errorOut(e, "Summary", "driverContigsSummarySummarize"); exit(1); } } /**********************************************************************************************************************/ long long Summary::summarizeContigsSummary(string summaryfile) { try { long long num = 0; vector positions; vector lines; #if defined NON_WINDOWS positions = util.divideFilePerLine(summaryfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosEachLine(summaryfile, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { if (m->getDebug()) { m->mothurOut("[DEBUG]: creating thread " + toString(i+1) + "\n"); } seqSumData* dataBundle = new seqSumData(summaryfile, lines[i+1].start, lines[i+1].end, hasNameOrCount, nameMap); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverContigsSummarySummarize, dataBundle)); } seqSumData* dataBundle = new seqSumData(summaryfile, lines[0].start, lines[0].end, hasNameOrCount, nameMap); driverContigsSummarySummarize(dataBundle); num = dataBundle->count-1; //header line total = dataBundle->total; ostartPosition = dataBundle->ostartPosition; oendPosition = dataBundle->oendPosition; seqLength = dataBundle->seqLength; oseqLength = dataBundle->oseqLength; misMatches = dataBundle->misMatches; numNs = dataBundle->numNs; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; total += data[i]->total; for (map::iterator it = data[i]->ostartPosition.begin(); it != data[i]->ostartPosition.end(); it++) { map::iterator itMain = ostartPosition.find(it->first); if (itMain == ostartPosition.end()) { //newValue ostartPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->oendPosition.begin(); it != data[i]->oendPosition.end(); it++) { map::iterator itMain = oendPosition.find(it->first); if (itMain == oendPosition.end()) { //newValue oendPosition[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->oseqLength.begin(); it != data[i]->oseqLength.end(); it++) { map::iterator itMain = oseqLength.find(it->first); if (itMain == oseqLength.end()) { //newValue oseqLength[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->seqLength.begin(); it != data[i]->seqLength.end(); it++) { map::iterator itMain = seqLength.find(it->first); if (itMain == seqLength.end()) { //newValue seqLength[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->misMatches.begin(); it != data[i]->misMatches.end(); it++) { map::iterator itMain = misMatches.find(it->first); if (itMain == misMatches.end()) { //newValue misMatches[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->numNs.begin(); it != data[i]->numNs.end(); it++) { map::iterator itMain = numNs.find(it->first); if (itMain == numNs.end()) { //newValue numNs[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } delete data[i]; delete workerThreads[i]; } if (hasNameOrCount) { if (nameCountNumUniques != num) { // do fasta and name/count files match m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(nameCountNumUniques) + " unique sequences, but your fasta file contains " + toString(num) + ". File mismatch detected, quitting command.\n"); m->setControl_pressed(true); } } numUniques = num; return num; } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFastaSummary"); exit(1); } } //********************************************************************************************************************** long long Summary::summarizeAlignSummary(string summaryfile, string n) { try { //fill namemap processNameCount(n); return (summarizeAlignSummary(summaryfile)); } catch(exception& e) { m->errorOut(e, "Summary", "summarizeFastaSummary"); exit(1); } } //********************************************************************************************************************** void driverAlignSummarySummarize(seqSumData* params) { try { ifstream in; params->util.openInputFile(params->filename, in); in.seekg(params->start); AlignReport report; //print header if you are process 0 if (params->start == 0) { params->util.zapGremlins(in); report.readHeaders(in); gobble(in); params->count++; } bool done = false; while (!done) { if (params->m->getControl_pressed()) { break; } report.read(in); gobble(in); string name = report.getQueryName(); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + name + "\t" + toString(report.getTemplateName()) + "\t" + toString(report.getSearchScore()) + "\t" + toString(report.getQueryLength()) + "\n"); } if (name != "") { //string seqInfo = addSeq(name, length, SimBtwnQueryTemplate, SearchScore, LongestInsert); long long numReps = 1; if (params->hasNameMap) { //make sure this sequence is in the namefile, else error map::iterator itFindName = params->nameMap.find(name); if (itFindName == params->nameMap.end()) { params->m->mothurOut("[ERROR]: '" + name + "' is not in your name or count file, please correct."); params->m->mothurOutEndLine(); params->m->setControl_pressed(true); } else { numReps = itFindName->second; } } float SimBtwnQueryTemplate = report.getSimBtwnQueryAndTemplate(); map:: iterator itFloat = params->sims.find(SimBtwnQueryTemplate); if (itFloat == params->sims.end()) { params->sims[SimBtwnQueryTemplate] = numReps; } //first finding of this similarity score, set count. else { itFloat->second += numReps; } //add counts float SearchScore = report.getSearchScore(); itFloat = params->scores.find(SearchScore); if (itFloat == params->scores.end()) { params->scores[SearchScore] = numReps; } //first finding of this end position, set count. else { itFloat->second += numReps; } //add counts int longestInsert = report.getLongestInsert(); map::iterator it = params->inserts.find(longestInsert); if (it == params->inserts.end()) { params->inserts[longestInsert] = numReps; } //first finding of this length, set count. else { it->second += numReps; } //add counts int length = report.getQueryLength(); it = params->seqLength.find(length); if (it == params->seqLength.end()) { params->seqLength[length] = numReps; } //first finding of this length, set count. else { it->second += numReps; } //add counts params->count++; params->total += numReps; } #if defined NON_WINDOWS unsigned long long pos = in.tellg(); if ((pos == -1) || (pos >= params->end)) { break; } #else if (params->end == params->count) { break; } #endif } in.close(); } catch(exception& e) { params->m->errorOut(e, "Summary", "driverAlignSummarySummarize"); exit(1); } } /**********************************************************************************************************************/ long long Summary::summarizeAlignSummary(string summaryfile) { try { long long num = 0; vector positions; vector lines; #if defined NON_WINDOWS positions = util.divideFilePerLine(summaryfile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else positions = util.setFilePosEachLine(summaryfile, num); if (num < processors) { processors = num; } //figure out how many sequences you have to process int numSeqsPerProcessor = num / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = num - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector workerThreads; vector data; //Lauch worker threads for (int i = 0; i < processors-1; i++) { seqSumData* dataBundle = new seqSumData(summaryfile, lines[i+1].start, lines[i+1].end, hasNameOrCount, nameMap); data.push_back(dataBundle); workerThreads.push_back(new std::thread(driverAlignSummarySummarize, dataBundle)); } seqSumData* dataBundle = new seqSumData(summaryfile, lines[0].start, lines[0].end, hasNameOrCount, nameMap); driverAlignSummarySummarize(dataBundle); num = dataBundle->count-1; //header line total = dataBundle->total; sims = dataBundle->sims; scores = dataBundle->scores; inserts = dataBundle->inserts; seqLength = dataBundle->seqLength; delete dataBundle; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->count; total += data[i]->total; for (map::iterator it = data[i]->sims.begin(); it != data[i]->sims.end(); it++) { map::iterator itMain = sims.find(it->first); if (itMain == sims.end()) { //newValue sims[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->scores.begin(); it != data[i]->scores.end(); it++) { map::iterator itMain = scores.find(it->first); if (itMain == scores.end()) { //newValue scores[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->inserts.begin(); it != data[i]->inserts.end(); it++) { map::iterator itMain = inserts.find(it->first); if (itMain == inserts.end()) { //newValue inserts[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } for (map::iterator it = data[i]->seqLength.begin(); it != data[i]->seqLength.end(); it++) { map::iterator itMain = seqLength.find(it->first); if (itMain == seqLength.end()) { //newValue seqLength[it->first] = it->second; }else { itMain->second += it->second; } //merge counts } delete data[i]; delete workerThreads[i]; } if (hasNameOrCount) { if (nameCountNumUniques != num) { // do fasta and name/count files match m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(nameCountNumUniques) + " unique sequences, but your fasta file contains " + toString(num) + ". File mismatch detected, quitting command.\n"); m->setControl_pressed(true); } } numUniques = num; return num; } catch(exception& e) { m->errorOut(e, "Summary", "summarizeAlignSummary"); exit(1); } } //********************************************************************************************************************** mothur-1.48.0/source/summary.hpp000077500000000000000000000202651424121717000166370ustar00rootroot00000000000000// // summary.hpp // Mothur // // Created by Sarah Westcott on 3/27/17. // Copyright © 2017 Schloss Lab. All rights reserved. // #ifndef summary_hpp #define summary_hpp #include "mothurout.h" #include "sequence.hpp" #include "counttable.h" class Summary { public: #ifdef UNIT_TEST friend class TestSummary; #endif Summary(int p) { processors = p; m = MothurOut::getInstance(); total = 0; numUniques = 0; hasNameOrCount = false; nameCountNumUniques = 0; type = "count"; } ~Summary() = default; long long summarizeFasta(string f, string n, string o); //provide fasta file to summarize (paralellized) and optional nameorCountfile and optional outputfile for individual seqs info. To skip nameCount or output file, n="" and / or o="" long long summarizeFasta(string f, string o); //provide fasta file to summarize (paralellized) and optional outputfile for individual seqs info. To skip output file, o="" long long summarizeFastaSummary(string f); //provide summary of fasta file to summarize (paralellized) long long summarizeFastaSummary(string f, string n); //provide summary of fasta file and name or count file to summarize (paralellized) long long summarizeContigsSummary(string f); //provide summary of contigs summary file to summarize (paralellized) long long summarizeContigsSummary(string f, string n); //provide summary of contigs summary file and name or count file to summarize (paralellized) long long summarizeAlignSummary(string f); //provide summary of contigs summary file to summarize (paralellized) long long summarizeAlignSummary(string f, string n); //provide summary of contigs summary file and name or count file to summarize (paralellized) vector getDefaults(); //fasta and summary vector getStart() { return (getValues(startPosition)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getStart(double value) { return (getValue(startPosition, value)); } //2.5 = 2.5% of sequences of sequences start before, 25 = location 25% of sequences start before vector getEnd() { return (getValues(endPosition)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getEnd(double value) { return (getValue(endPosition, value)); } //2.5 = 2.5% of sequences of sequences end after, 25 = location 25% of sequences end after vector getAmbig() { return (getValues(ambigBases)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getAmbig(double value) { return (getValue(ambigBases, value)); } //25 = max abigous bases 25% of sequences contain vector getLength() { return (getValues(seqLength)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getLength(double value) { return (getValue(seqLength, value)); } // 25 = min length of 25% of sequences vector getHomop() { return (getValues(longHomoPolymer)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getHomop(double value) { return (getValue(longHomoPolymer, value)); } //contigs vector getOStart() { return (getValues(ostartPosition)); } //contigs overlap start - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getOStart(double value) { return (getValue(ostartPosition, value)); } //contigs overlap start - 2.5 = 2.5% of sequences of sequences start before, 25 = location 25% of sequences start before vector getOEnd() { return (getValues(oendPosition)); } //contigs overlap end -returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getOEnd(double value) { return (getValue(oendPosition, value)); } //contigs overlap end -2.5 = 2.5% of sequences of sequences end after, 25 = location 25% of sequences end after vector getOLength() { return (getValues(oseqLength)); } //contigs overlap length - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getOLength(double value) { return (getValue(oseqLength, value)); } //contigs overlap length - 25 = min length of 25% of sequences vector getMisMatches() { return (getValues(misMatches)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getMisMatches(double value) { return (getValue(misMatches, value)); } vector getNumNs() { return (getValues(numNs)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getNumNs(double value) { return (getValue(numNs, value)); } //25 = max abigous bases 25% of sequences contain vector getSims() { return (getValues(sims)); } //contigs overlap length - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getSims(double value) { return (getValue(sims, value)); } //contigs overlap length - 25 = min length of 25% of sequences vector getScores() { return (getValues(scores)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getScores(double value) { return (getValue(scores, value)); } vector getNumInserts() { return (getValues(inserts)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean) long long getNumInserts(double value) { return (getValue(inserts, value)); } //25 = max abigous bases 25% of sequences contain int getMaxAbundance(); long long getTotalSeqs() { return total; } long long getUniqueSeqs() { return numUniques; } private: MothurOut* m; Utils util; int processors; long long total, numUniques, nameCountNumUniques; bool hasNameOrCount; string type; map startPosition; map endPosition; map seqLength; map ambigBases; map longHomoPolymer; map ostartPosition; map oendPosition; map oseqLength; map misMatches; map numNs; map sims; map scores; map inserts; map nameMap; map::iterator it; void processNameCount(string n); //determines whether name or count and fills nameMap, ignored if n = "" vector getValues(map& positions); long long getValue(map& positions, double); vector getValues(map& positions); long long getValue(map& positions, double); bool isCountFile(string); }; /**************************************************************************************************/ struct seqSumData { map startPosition; map endPosition; map seqLength; map ambigBases; map longHomoPolymer; map ostartPosition; map oendPosition; map oseqLength; map misMatches; map numNs; map sims; map scores; map inserts; string filename, summaryFile, contigsfile, output; double start; double end; long long count; long long total; MothurOut* m; bool hasNameMap; map nameMap; Utils util; seqSumData(){} //FastaSummarize - output file created seqSumData(string f, string sum, double st, double en, bool na, map nam) { filename = f; m = MothurOut::getInstance(); start = st; end = en; hasNameMap = na; nameMap = nam; count = 0; total = 0; summaryFile = sum; } //FastaSummarySummarize - no output files seqSumData(string f, double st, double en, bool na, map nam) { filename = f; m = MothurOut::getInstance(); start = st; end = en; hasNameMap = na; nameMap = nam; count = 0; total = 0; } }; /**************************************************************************************************/ #endif /* summary_hpp */ mothur-1.48.0/source/svm/000077500000000000000000000000001424121717000152265ustar00rootroot00000000000000mothur-1.48.0/source/svm/svm.cpp000077500000000000000000001475411424121717000165560ustar00rootroot00000000000000// // svm.cpp // support vector machine // // Created by Joshua Lynch on 6/19/2013. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #include #include #include #include #include #include #include #include #include "svm.hpp" // OutputFilter constants const int OutputFilter::QUIET = 0; const int OutputFilter::INFO = 1; const int OutputFilter::mDEBUG = 2; const int OutputFilter::TRACE = 3; #define RANGE(X) X, X + sizeof(X)/sizeof(double) // parameters will be tested in the order they are specified const string LinearKernelFunction::MapKey = "linear";//"LinearKernel"; const string LinearKernelFunction::MapKey_Constant = "constant";//"LinearKernel_Constant"; const double defaultLinearConstantRangeArray[] = {0.0, -1.0, 1.0, -10.0, 10.0}; const ParameterRange LinearKernelFunction::defaultConstantRange = ParameterRange(RANGE(defaultLinearConstantRangeArray)); const string RbfKernelFunction::MapKey = "rbf";//"RbfKernel"; const string RbfKernelFunction::MapKey_Gamma = "gamma";//"RbfKernel_Gamma"; const double defaultRbfGammaRangeArray[] = {0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0}; const ParameterRange RbfKernelFunction::defaultGammaRange = ParameterRange(RANGE(defaultRbfGammaRangeArray)); const string PolynomialKernelFunction::MapKey = "polynomial";//"PolynomialKernel"; const string PolynomialKernelFunction::MapKey_Constant = "constant";//"PolynomialKernel_Constant"; const string PolynomialKernelFunction::MapKey_Coefficient = "coefficient";//"PolynomialKernel_Coefficient"; const string PolynomialKernelFunction::MapKey_Degree = "degree";//"PolynomialKernel_Degree"; const double defaultPolynomialConstantRangeArray[] = {0.0, -1.0, 1.0, -2.0, 2.0, -3.0, 3.0}; const ParameterRange PolynomialKernelFunction::defaultConstantRange = ParameterRange(RANGE(defaultPolynomialConstantRangeArray)); const double defaultPolynomialCoefficientRangeArray[] = {0.01, 0.1, 1.0, 10.0, 100.0}; const ParameterRange PolynomialKernelFunction::defaultCoefficientRange = ParameterRange(RANGE(defaultPolynomialCoefficientRangeArray)); const double defaultPolynomialDegreeRangeArray[] = {2.0, 3.0, 4.0}; const ParameterRange PolynomialKernelFunction::defaultDegreeRange = ParameterRange(RANGE(defaultPolynomialDegreeRangeArray)); const string SigmoidKernelFunction::MapKey = "sigmoid"; const string SigmoidKernelFunction::MapKey_Alpha = "alpha"; const string SigmoidKernelFunction::MapKey_Constant = "constant"; const double defaultSigmoidAlphaRangeArray[] = {1.0, 2.0}; const ParameterRange SigmoidKernelFunction::defaultAlphaRange = ParameterRange(RANGE(defaultSigmoidAlphaRangeArray)); const double defaultSigmoidConstantRangeArray[] = {1.0, 2.0}; const ParameterRange SigmoidKernelFunction::defaultConstantRange = ParameterRange(RANGE(defaultSigmoidConstantRangeArray)); const string SmoTrainer::MapKey_C = "smoc";//"SmoTrainer_C"; const double defaultSmoTrainerCRangeArray[] = {0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0}; const ParameterRange SmoTrainer::defaultCRange = ParameterRange(RANGE(defaultSmoTrainerCRangeArray)); MothurOut* m = MothurOut::getInstance(); LabelPair buildLabelPair(const Label& one, const Label& two) { LabelVector labelPair(2); labelPair[0] = one; labelPair[1] = two; return labelPair; } // Dividing a dataset into training and testing sets while maintaining equal // representation of all classes is done using a LabelToLabeledObservationVector. // This container is used to divide datasets into groups of LabeledObservations // having the same label. For example, given a LabeledObservationVector like // ["blue", [1.0, 2.0, 3.0]] // ["green", [3.0, 4.0, 5.0]] // ["blue", [2,0, 3.0. 4.0]] // ["green", [4.0, 5.0, 6.0]] // the corresponding LabelToLabeledObservationVector looks like // "blue" : [["blue", [1.0, 2.0, 3.0]], ["blue", [2,0, 3.0. 4.0]]] // "green" : [["green", [3.0, 4.0, 5.0]], ["green", [4.0, 5.0, 6.0]]] void buildLabelToLabeledObservationVector(LabelToLabeledObservationVector& labelToLabeledObservationVector, const LabeledObservationVector& labeledObservationVector) { for ( LabeledObservationVector::const_iterator j = labeledObservationVector.begin(); j != labeledObservationVector.end(); j++ ) { labelToLabeledObservationVector[j->first].push_back(*j); } } class MeanAndStd { private: double n; double M2; double mean; public: MeanAndStd() = default; ~MeanAndStd() = default; void initialize() { n = 0.0; mean = 0.0; M2 = 0.0; } void processNextValue(double x) { n += 1.0; double delta = x - mean; mean += delta / n; M2 += delta * (x - mean); } double getMean() { return mean; } double getStd() { double variance = M2 / (n - 1.0); return sqrt(variance); } }; // The LabelMatchesEither functor is used only in a call to remove_copy_if in the // OneVsOneMultiClassSvmTrainer::train method. It returns true if the labeled // observation argument has the same label as either of the two label arguments. class FeatureLabelMatches { public: FeatureLabelMatches(const string& _featureLabel) : featureLabel(_featureLabel){} bool operator() (const Feature& f) { return f.getFeatureLabel() == featureLabel; } private: const string& featureLabel; }; Feature removeFeature(Feature featureToRemove, LabeledObservationVector& observations, FeatureVector& featureVector) { FeatureLabelMatches matchFeatureLabel(featureToRemove.getFeatureLabel()); featureVector.erase( remove_if(featureVector.begin(), featureVector.end(), matchFeatureLabel), featureVector.end() ); for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { observations[observation].removeFeatureAtIndex(featureToRemove.getFeatureIndex()); } // update the feature indices for ( int i = 0; i < featureVector.size(); i++ ) { featureVector.at(i).setFeatureIndex(i); } featureToRemove.setFeatureIndex(-1); return featureToRemove; } FeatureVector applyStdThreshold(double stdThreshold, LabeledObservationVector& observations, FeatureVector& featureVector) { // calculate standard deviation of each feature // remove features with standard deviation less than or equal to stdThreshold MeanAndStd ms; // loop over features in reverse order so we can get the index of each // for example, // if there are 5 features a,b,c,d,e // and features a, c, e fall below the stdThreshold // loop iteration 0: remove feature e (index 4) -- features are now a,b,c,d // loop iteration 1: leave feature d (index 3) // loop iteration 2: remove feature c (index 2) -- features are now a,b,d // loop iteration 3: leave feature b (index 1) // loop iteration 4: remove feature a (index 0) -- features are now b,d FeatureVector removedFeatureVector; for ( int feature = observations[0].second->size()-1; feature >= 0 ; feature-- ) { ms.initialize(); m->mothurOut("feature index " + toString(feature)); m->mothurOutEndLine(); for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { ms.processNextValue(observations[observation].second->at(feature)); } m->mothurOut( "feature " + toString(feature) + " has std " + toString(ms.getStd()) ); m->mothurOutEndLine(); if ( ms.getStd() <= stdThreshold ) { m->mothurOut( "removing feature with index " + toString(feature) ); m->mothurOutEndLine(); // remove this feature Feature featureToRemove = featureVector.at(feature); removedFeatureVector.push_back( removeFeature(featureToRemove, observations, featureVector) ); } } reverse(removedFeatureVector.begin(), removedFeatureVector.end()); return removedFeatureVector; } // this function standardizes data to mean 0 and variance 1 // but this may not be a good standardization for OTU data void transformZeroMeanUnitVariance(LabeledObservationVector& observations) { bool vebose = false; // online method for mean and variance MeanAndStd ms; for ( Observation::size_type feature = 0; feature < observations[0].second->size(); feature++ ) { ms.initialize(); //double n = 0.0; //double mean = 0.0; //double M2 = 0.0; for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { ms.processNextValue(observations[observation].second->at(feature)); //n += 1.0; //double x = observations[observation].second->at(feature); //double delta = x - mean; //mean += delta / n; //M2 += delta * (x - mean); } //double variance = M2 / (n - 1.0); //double standardDeviation = sqrt(variance); if (vebose) { m->mothurOut( "mean of feature " + toString(feature) + " is " + toString(ms.getMean()) ); m->mothurOutEndLine(); m->mothurOut( "std of feature " + toString(feature) + " is " + toString(ms.getStd()) ); m->mothurOutEndLine(); } // normalize the feature double mean = ms.getMean(); double std = ms.getStd(); for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { observations[observation].second->at(feature) = (observations[observation].second->at(feature) - mean ) / std; } } } double getMinimumFeatureValueForObservation(Observation::size_type featureIndex, LabeledObservationVector& observations) { double featureMinimum = numeric_limits::max(); for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { if ( observations[observation].second->at(featureIndex) < featureMinimum ) { featureMinimum = observations[observation].second->at(featureIndex); } } return featureMinimum; } double getMaximumFeatureValueForObservation(Observation::size_type featureIndex, LabeledObservationVector& observations) { double featureMaximum = numeric_limits::min(); for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { if ( observations[observation].second->at(featureIndex) > featureMaximum ) { featureMaximum = observations[observation].second->at(featureIndex); } } return featureMaximum; } // this function standardizes data to minimum value 0.0 and maximum value 1.0 void transformZeroOne(LabeledObservationVector& observations) { for ( Observation::size_type feature = 0; feature < observations[0].second->size(); feature++ ) { double featureMinimum = getMinimumFeatureValueForObservation(feature, observations); double featureMaximum = getMaximumFeatureValueForObservation(feature, observations); // standardize the feature for ( ObservationVector::size_type observation = 0; observation < observations.size(); observation++ ) { double x = observations[observation].second->at(feature); double xstd = (x - featureMinimum) / (featureMaximum - featureMinimum); observations[observation].second->at(feature) = xstd / (1.0 - 0.0) + 0.0; } } } // // SVM member functions // // the discriminant member function returns +1 or -1 int SVM::discriminant(const Observation& observation) const { // d is the discriminant function double d = b; for ( int i = 0; i < y.size(); i++ ) { d += y[i]*a[i]*inner_product(observation.begin(), observation.end(), x[i].second->begin(), 0.0); } return d > 0.0 ? 1 : -1; } LabelVector SVM::classify(const LabeledObservationVector& twoClassLabeledObservationVector) const { LabelVector predictionVector; for ( LabeledObservationVector::const_iterator i = twoClassLabeledObservationVector.begin(); i != twoClassLabeledObservationVector.end(); i++ ) { Label prediction = classify(*(i->getObservation())); Label actual = i->getLabel(); predictionVector.push_back(prediction); } return predictionVector; } // the score member function classifies each labeled observation from the // argument and returns the fraction of correct classifications // don't need this any more???? double SVM::score(const LabeledObservationVector& twoClassLabeledObservationVector) const { double s = 0.0; for ( LabeledObservationVector::const_iterator i = twoClassLabeledObservationVector.begin(); i != twoClassLabeledObservationVector.end(); i++ ) { Label predicted_label = classify(*(i->second)); if ( predicted_label == i->first ) { s = s + 1.0; } else { } } return s / double(twoClassLabeledObservationVector.size()); } void SvmPerformanceSummary::init(const SVM& svm, const LabeledObservationVector& actualLabels, const LabelVector& predictedLabels) { // accumulate four counts: // tp (true positive) -- correct classifications (classified +1 as +1) // fp (false positive) -- incorrect classifications (classified -1 as +1) // fn (false negative) -- incorrect classifications (classified +1 as -1) // tn (true negative) -- correct classification (classified -1 as -1) // the label corresponding to discriminant +1 will be the 'positive' class NumericClassToLabel discriminantToLabel = svm.getDiscriminantToLabel(); positiveClassLabel = discriminantToLabel[1]; negativeClassLabel = discriminantToLabel[-1]; double tp = 0; double fp = 0; double fn = 0; double tn = 0; for (int i = 0; i < actualLabels.size(); i++) { Label predictedLabel = predictedLabels.at(i); Label actualLabel = actualLabels.at(i).getLabel(); if ( actualLabel.compare(positiveClassLabel) == 0) { if ( predictedLabel.compare(positiveClassLabel) == 0 ) { tp++; } else if ( predictedLabel.compare(negativeClassLabel) == 0 ) { fn++; } else { m->mothurOut( "actual label is positive but something is wrong" ); m->mothurOutEndLine(); } } else if ( actualLabel.compare(negativeClassLabel) == 0 ) { if ( predictedLabel.compare(positiveClassLabel) == 0 ) { fp++; } else if ( predictedLabel.compare(negativeClassLabel) == 0 ) { tn++; } else { m->mothurOut( "actual label is negative but something is wrong" ); m->mothurOutEndLine(); } } else { // in the event we have been given an observation that is labeled // neither positive nor negative then we will get a false classification if ( predictedLabel.compare(positiveClassLabel) ) { fp++; } else { fn++; } } } Utils util; if (util.isEqual(tp, 0) && util.isEqual(fp, 0) ) { precision = 0; } else { precision = tp / (tp + fp); } recall = tp / (tp + fn); if ( util.isEqual(precision, 0) && util.isEqual(recall, 0) ) { f = 0; } else { f = 2.0 * (precision * recall) / (precision + recall); } accuracy = (tp + tn) / (tp + tn + fp + fn); } MultiClassSVM::MultiClassSVM(const vector s, const LabelSet& l, const SvmToSvmPerformanceSummary& p, OutputFilter of) : twoClassSvmList(s.begin(), s.end()), labelSet(l), svmToSvmPerformanceSummary(p), outputFilter(of), accuracy(0) {} MultiClassSVM::~MultiClassSVM() { for ( int i = 0; i < twoClassSvmList.size(); i++ ) { delete twoClassSvmList[i]; } } // The fewerVotes function is used to find the maximum vote // tally in MultiClassSVM::classify. This function returns true // if the first element (number of votes for the first label) is // less than the second element (number of votes for the second label). bool fewerVotes(const pair& p, const pair& q) { return p.second < q.second; } Label MultiClassSVM::classify(const Observation& observation) { map labelToVoteCount; for ( int i = 0; i < twoClassSvmList.size(); i++ ) { Label predictedLabel = twoClassSvmList[i]->classify(observation); labelToVoteCount[predictedLabel]++; } pair winner = *max_element(labelToVoteCount.begin(), labelToVoteCount.end(), fewerVotes); LabelVector winningLabels; winningLabels.push_back(winner.first); for ( map::const_iterator i = labelToVoteCount.begin(); i != labelToVoteCount.end(); i++ ) { if ( i->second == winner.second && i->first != winner.first ) { winningLabels.push_back(i->first); } } if ( winningLabels.size() == 1) { // we have a winner } else { // we have a tie throw MultiClassSvmClassificationTie(winningLabels, winner.second); } return winner.first; } double MultiClassSVM::score(const LabeledObservationVector& multiClassLabeledObservationVector) { double s = 0.0; for (LabeledObservationVector::const_iterator i = multiClassLabeledObservationVector.begin(); i != multiClassLabeledObservationVector.end(); i++) { try { Label predicted_label = classify(*(i->second)); if ( predicted_label == i->first ) { s = s + 1.0; } else { // predicted label does not match actual label } } catch ( MultiClassSvmClassificationTie& e ) { if ( outputFilter.debug() ) { m->mothurOut( "classification tie for observation " + toString(i->datasetIndex) + " with label " + toString(i->first) ); m->mothurOutEndLine(); } } } return s / double(multiClassLabeledObservationVector.size()); } class MaxIterationsExceeded : public exception { virtual const char* what() const throw() { return "maximum iterations exceeded during SMO"; } } maxIterationsExceeded; //SvmTrainingInterruptedException smoTrainingInterruptedException("SMO training interrupted by user"); // The train method implements Sequential Minimal Optimization as described in // "Support Vector Machine Solvers" by Bottou and Lin. // // SmoTrainer::train releases a pointer to an SVM into the wild so we must be // careful about handling the LabeledObservationVector.... Must create a copy // of those labeled vectors??? SVM* SmoTrainer::train(KernelFunctionCache& K, const LabeledObservationVector& twoClassLabeledObservationVector) { const int observationCount = twoClassLabeledObservationVector.size(); const int featureCount = twoClassLabeledObservationVector[0].second->size(); if (outputFilter.debug()) m->mothurOut( "observation count : " + toString(observationCount) ); m->mothurOutEndLine(); if (outputFilter.debug()) m->mothurOut( "feature count : " + toString(featureCount) ); m->mothurOutEndLine(); // dual coefficients vector a(observationCount, 0.0); // gradient vector g(observationCount, 1.0); // convert the labels to -1.0,+1.0 vector y(observationCount); if (outputFilter.trace()) m->mothurOut( "assign numeric labels" ); m->mothurOutEndLine(); NumericClassToLabel discriminantToLabel; assignNumericLabels(y, twoClassLabeledObservationVector, discriminantToLabel); if (outputFilter.trace()) m->mothurOut( "assign A and B" ); m->mothurOutEndLine(); vector A(observationCount); vector B(observationCount); Utils util; for ( int n = 0; n < observationCount; n++ ) { if ( util.isEqual(y[n], +1.0)) { A[n] = 0.0; B[n] = C; } else { A[n] = -C; B[n] = 0; } if (outputFilter.trace()) m->mothurOut( toString(n) + " " + toString(A[n]) + " " + toString(B[n]) ); m->mothurOutEndLine(); } if (outputFilter.trace()) m->mothurOut( "assign K" ); m->mothurOutEndLine(); int m_count = 0; vector u(3); vector ya(observationCount); vector yg(observationCount); double lambda = numeric_limits::max(); while ( true ) { if (m->getControl_pressed()) { return 0; } m_count++; int i = 0; // 0 int j = 0; // 0 double yg_max = numeric_limits::min(); double yg_min = numeric_limits::max(); if (outputFilter.trace()) m->mothurOut( "m = " + toString(m_count) ); m->mothurOutEndLine(); for ( int k = 0; k < observationCount; k++ ) { ya[k] = y[k] * a[k]; yg[k] = y[k] * g[k]; } if (outputFilter.trace()) { m->mothurOut( "yg ="); for ( int k = 0; k < observationCount; k++ ) { m->mothurOut( " " + toString(yg[k])); } m->mothurOutEndLine(); } for ( int k = 0; k < observationCount; k++ ) { if ( ya[k] < B[k] && yg[k] > yg_max ) { yg_max = yg[k]; i = k; } if ( A[k] < ya[k] && yg[k] < yg_min ) { yg_min = yg[k]; j = k; } } // maximum violating pair is i,j if (outputFilter.trace()) { m->mothurOut( "maximal violating pair: " + toString(i) + " " + toString(j) ); m->mothurOutEndLine(); m->mothurOut( " i = " + toString(i) + " features: "); for ( int feature = 0; feature < featureCount; feature++ ) { m->mothurOut( toString(twoClassLabeledObservationVector[i].second->at(feature)) + " "); }; m->mothurOutEndLine(); m->mothurOut( " j = " + toString(j) + " features: "); for ( int feature = 0; feature < featureCount; feature++ ) { m->mothurOut( toString(twoClassLabeledObservationVector[j].second->at(feature)) + " "); }; m->mothurOutEndLine(); } // parameterize this if ( m_count > 1000 ) { //1000 // what happens if we just go with what we've got instead of throwing an exception? // things work pretty well for the most part // might be better to look at lambda??? if (outputFilter.debug()) m->mothurOut( "iteration limit reached with lambda = " + toString(lambda) ); m->mothurOutEndLine(); break; } // using lambda to break is a good performance enhancement if ( yg[i] <= yg[j] or lambda < 0.0001) { break; } u[0] = B[i] - ya[i]; u[1] = ya[j] - A[j]; double K_ii = K.similarity(twoClassLabeledObservationVector[i], twoClassLabeledObservationVector[i]); double K_jj = K.similarity(twoClassLabeledObservationVector[j], twoClassLabeledObservationVector[j]); double K_ij = K.similarity(twoClassLabeledObservationVector[i], twoClassLabeledObservationVector[j]); u[2] = (yg[i] - yg[j]) / (K_ii+K_jj-2.0*K_ij); if (outputFilter.trace()) m->mothurOut( "directions: (" + toString(u[0]) + "," + toString(u[1]) + "," + toString(u[2]) + ")" ); m->mothurOutEndLine(); lambda = *min_element(u.begin(), u.end()); if (outputFilter.trace()) m->mothurOut( "lambda: " + toString(lambda) ); m->mothurOutEndLine(); for ( int k = 0; k < observationCount; k++ ) { double K_ik = K.similarity(twoClassLabeledObservationVector[i], twoClassLabeledObservationVector[k]); double K_jk = K.similarity(twoClassLabeledObservationVector[j], twoClassLabeledObservationVector[k]); g[k] += (-lambda * y[k] * K_ik + lambda * y[k] * K_jk); } if (outputFilter.trace()) { m->mothurOut( "g ="); for ( int k = 0; k < observationCount; k++ ) { m->mothurOut( " " + toString(g[k])); } m->mothurOutEndLine(); } a[i] += y[i] * lambda; a[j] -= y[j] * lambda; } // at this point the optimal a's have been found // now use them to find w and b if (outputFilter.trace()) m->mothurOut( "find w" ); m->mothurOutEndLine(); vector w(twoClassLabeledObservationVector[0].second->size(), 0.0); double b = 0.0; for ( int i = 0; i < y.size(); i++ ) { if (outputFilter.trace()) m->mothurOut( "alpha[" + toString(i) + "] = " + toString(a[i]) ); m->mothurOutEndLine(); for ( int j = 0; j < w.size(); j++ ) { w[j] += a[i] * y[i] * twoClassLabeledObservationVector[i].second->at(j); } if ( A[i] < a[i] && a[i] < B[i] ) { b = yg[i]; if (outputFilter.trace()) m->mothurOut( "b = " + toString(b) ); m->mothurOutEndLine(); } } if (outputFilter.trace()) { for ( int i = 0; i < w.size(); i++ ) { m->mothurOut( "w[" + toString(i) + "] = " + toString(w[i]) ); m->mothurOutEndLine(); } } // be careful about passing twoClassLabeledObservationVector - what if this vector // is deleted??? // // we can eliminate elements of y, a and observation vectors corresponding to a = 0 vector support_y; vector nonzero_a; LabeledObservationVector supportVectors; for (int i = 0; i < a.size(); i++) { if ( util.isEqual(a.at(i), 0.0) ) { // this dual coefficient does not correspond to a support vector } else { support_y.push_back(y.at(i)); nonzero_a.push_back(a.at(i)); supportVectors.push_back(twoClassLabeledObservationVector.at(i)); } } //return new SVM(y, a, twoClassLabeledObservationVector, b, discriminantToLabel); if (outputFilter.info()) m->mothurOut( "found " + toString(supportVectors.size()) + " support vectors\n" ); return new SVM(support_y, nonzero_a, supportVectors, b, discriminantToLabel); } typedef map LabelToNumericClassLabel; // For SVM training we need to assign numeric class labels of -1.0 and +1.0. // This method populates the y vector argument with -1.0 and +1.0 // corresponding to the two classes in the labelVector argument. // For example, if labeledObservationVector looks like this: // [ (0, "blue", [...some observations...]), // (1, "green", [...some observations...]), // (2, "blue", [...some observations...]) ] // Then after the function executes the y vector will look like this: // [-1.0, blue // +1.0, green // -1.0] blue // and discriminantToLabel will look like this: // { -1.0 : "blue", // +1.0 : "green" } // The label "blue" is mapped to -1.0 because it is (lexicographically) less than "green". // When given labels "blue" and "green" this function will always assign "blue" to -1.0 and // "green" to +1.0. This is not fundamentally important but it makes testing easier and is // not a hassle to implement. void SmoTrainer::assignNumericLabels(vector& y, const LabeledObservationVector& labeledObservationVector, NumericClassToLabel& discriminantToLabel) { // it would be nice if we assign -1.0 and +1.0 consistently for each pair of labels // I think the label set will always be traversed in sorted order so we should get this for free // we are going to overwrite arguments y and discriminantToLabel y.clear(); discriminantToLabel.clear(); LabelSet labelSet; buildLabelSet(labelSet, labeledObservationVector); LabelVector uniqueLabels(labelSet.begin(), labelSet.end()); if (labelSet.size() != 2) { // throw an exception cerr << "unexpected label set size " << labelSet.size() << endl; for (LabelSet::const_iterator i = labelSet.begin(); i != labelSet.end(); i++) { cerr << " label " << *i << endl; } throw SmoTrainerException("SmoTrainer::assignNumericLabels was passed more than 2 labels"); } else { LabelToNumericClassLabel labelToNumericClassLabel; labelToNumericClassLabel[uniqueLabels[0]] = -1.0; labelToNumericClassLabel[uniqueLabels[1]] = +1.0; for ( LabeledObservationVector::const_iterator i = labeledObservationVector.begin(); i != labeledObservationVector.end(); i++ ) { y.push_back( labelToNumericClassLabel[i->first] ); } discriminantToLabel[-1.0] = uniqueLabels[0]; discriminantToLabel[+1.0] = uniqueLabels[1]; } } // the is a convenience function for getting parameter ranges for all kernels void getDefaultKernelParameterRangeMap(KernelParameterRangeMap& kernelParameterRangeMap) { ParameterRangeMap linearParameterRangeMap; linearParameterRangeMap[SmoTrainer::MapKey_C] = SmoTrainer::defaultCRange; linearParameterRangeMap[LinearKernelFunction::MapKey_Constant] = LinearKernelFunction::defaultConstantRange; ParameterRangeMap rbfParameterRangeMap; rbfParameterRangeMap[SmoTrainer::MapKey_C] = SmoTrainer::defaultCRange; rbfParameterRangeMap[RbfKernelFunction::MapKey_Gamma] = RbfKernelFunction::defaultGammaRange; ParameterRangeMap polynomialParameterRangeMap; polynomialParameterRangeMap[SmoTrainer::MapKey_C] = SmoTrainer::defaultCRange; polynomialParameterRangeMap[PolynomialKernelFunction::MapKey_Constant] = PolynomialKernelFunction::defaultConstantRange; polynomialParameterRangeMap[PolynomialKernelFunction::MapKey_Coefficient] = PolynomialKernelFunction::defaultCoefficientRange; polynomialParameterRangeMap[PolynomialKernelFunction::MapKey_Degree] = PolynomialKernelFunction::defaultDegreeRange; ParameterRangeMap sigmoidParameterRangeMap; sigmoidParameterRangeMap[SmoTrainer::MapKey_C] = SmoTrainer::defaultCRange; sigmoidParameterRangeMap[SigmoidKernelFunction::MapKey_Alpha] = SigmoidKernelFunction::defaultAlphaRange; sigmoidParameterRangeMap[SigmoidKernelFunction::MapKey_Constant] = SigmoidKernelFunction::defaultConstantRange; kernelParameterRangeMap[LinearKernelFunction::MapKey] = linearParameterRangeMap; kernelParameterRangeMap[RbfKernelFunction::MapKey] = rbfParameterRangeMap; kernelParameterRangeMap[PolynomialKernelFunction::MapKey] = polynomialParameterRangeMap; kernelParameterRangeMap[SigmoidKernelFunction::MapKey] = sigmoidParameterRangeMap; } // // OneVsOneMultiClassSvmTrainer // // An instance of OneVsOneMultiClassSvmTrainer is intended to work with a single set of data // to produce a single instance of MultiClassSVM. That's why observations and labels go in to // the constructor. OneVsOneMultiClassSvmTrainer::OneVsOneMultiClassSvmTrainer(SvmDataset& d, int e, int t, OutputFilter& of) : svmDataset(d), evaluationFoldCount(e), trainFoldCount(t), outputFilter(of) { buildLabelSet(labelSet, svmDataset.getLabeledObservationVector()); buildLabelToLabeledObservationVector(labelToLabeledObservationVector, svmDataset.getLabeledObservationVector()); buildLabelPairSet(labelPairSet, svmDataset.getLabeledObservationVector()); } void buildLabelSet(LabelSet& labelSet, const LabeledObservationVector& labeledObservationVector) { for (LabeledObservationVector::const_iterator i = labeledObservationVector.begin(); i != labeledObservationVector.end(); i++) { labelSet.insert(i->first); } } // This function uses the LabeledObservationVector argument to populate the LabelPairSet // argument with pairs of labels. For example, if labeledObservationVector looks like this: // [ ("blue", x), ("green", y), ("red", z) ] // then the labelPairSet will be populated with the following label pairs: // ("blue", "green"), ("blue", "red"), ("green", "red") // The order of labels in the pairs is determined by the ordering of labels in the temporary // LabelSet. By default this order will be ascending. However, labels are taken off the // temporary labelStack in reverse order, so the labelStack is initialized with reverse iterators. // In the end our label pairs will be in sorted order. void OneVsOneMultiClassSvmTrainer::buildLabelPairSet(LabelPairSet& labelPairSet, const LabeledObservationVector& labeledObservationVector) { LabelSet labelSet; buildLabelSet(labelSet, labeledObservationVector); LabelVector labelStack(labelSet.rbegin(), labelSet.rend()); while (labelStack.size() > 1) { Label label = labelStack.back(); labelStack.pop_back(); LabelPair labelPair(2); labelPair[0] = label; for (LabelVector::const_iterator i = labelStack.begin(); i != labelStack.end(); i++) { labelPair[1] = *i; labelPairSet.insert( //make_pair(label, *i) labelPair ); } } } // The LabelMatchesEither functor is used only in a call to remove_copy_if in the // OneVsOneMultiClassSvmTrainer::train method. It returns true if the labeled // observation argument has the same label as either of the two label arguments. class LabelMatchesEither { public: LabelMatchesEither(const Label& _label0, const Label& _label1) : label0(_label0), label1(_label1) {} bool operator() (const LabeledObservation& o) { return !((o.first == label0) || (o.first == label1)); } private: const Label& label0; const Label& label1; }; MultiClassSVM* OneVsOneMultiClassSvmTrainer::train(const KernelParameterRangeMap& kernelParameterRangeMap) { double bestMultiClassSvmScore = 0.0; MultiClassSVM* bestMc; KernelFunctionFactory kernelFunctionFactory(svmDataset.getLabeledObservationVector()); // first divide the data into a 'development' set for tuning hyperparameters // and an 'evaluation' set for measuring performance int evaluationFoldNumber = 0; KFoldLabeledObservationsDivider kFoldDevEvalDivider(evaluationFoldCount, svmDataset.getLabeledObservationVector()); for ( kFoldDevEvalDivider.start(); !kFoldDevEvalDivider.end(); kFoldDevEvalDivider.next() ) { const LabeledObservationVector& developmentObservations = kFoldDevEvalDivider.getTrainingData(); const LabeledObservationVector& evaluationObservations = kFoldDevEvalDivider.getTestingData(); evaluationFoldNumber++; if ( outputFilter.debug() ) { m->mothurOut( "evaluation fold " + toString(evaluationFoldNumber) + " of " + toString(evaluationFoldCount) ); m->mothurOutEndLine(); } vector twoClassSvmList; SvmToSvmPerformanceSummary svmToSvmPerformanceSummary; SmoTrainer smoTrainer(outputFilter); LabelPairSet::iterator labelPair; for (labelPair = labelPairSet.begin(); labelPair != labelPairSet.end(); labelPair++) { // generate training and testing data for this label pair Label label0 = (*labelPair)[0]; Label label1 = (*labelPair)[1]; if ( outputFilter.debug() ) { m->mothurOut("training SVM on labels " + toString(label0) + " and " + toString(label1) ); m->mothurOutEndLine(); } double bestMeanScoreOnKFolds = 0.0; ParameterMap bestParameterMap; string bestKernelFunctionKey; LabeledObservationVector twoClassDevelopmentObservations; LabelMatchesEither labelMatchesEither(label0, label1); remove_copy_if( developmentObservations.begin(), developmentObservations.end(), back_inserter(twoClassDevelopmentObservations), labelMatchesEither //[&](const LabeledObservation& o){ // return !((o.first == label0) || (o.first == label1)); //} ); KFoldLabeledObservationsDivider kFoldLabeledObservationsDivider(trainFoldCount, twoClassDevelopmentObservations); // loop on kernel functions and kernel function parameters for ( KernelParameterRangeMap::const_iterator kmap = kernelParameterRangeMap.begin(); kmap != kernelParameterRangeMap.end(); kmap++ ) { string kernelFunctionKey = kmap->first; KernelFunction& kernelFunction = kernelFunctionFactory.getKernelFunctionForKey(kmap->first); ParameterSetBuilder p(kmap->second); for (ParameterMapVector::const_iterator hp = p.getParameterSetList().begin(); hp != p.getParameterSetList().end(); hp++) { kernelFunction.setParameters(*hp); KernelFunctionCache kernelFunctionCache(kernelFunction, svmDataset.getLabeledObservationVector()); smoTrainer.setParameters(*hp); if (outputFilter.debug()) { m->mothurOut( "parameters for " + toString(kernelFunctionKey) + " kernel" ); m->mothurOutEndLine(); for ( ParameterMap::const_iterator i = hp->begin(); i != hp->end(); i++ ) { m->mothurOut( " " + toString(i->first) + ":" + toString(i->second) ); m->mothurOutEndLine(); } } double meanScoreOnKFolds = trainOnKFolds(smoTrainer, kernelFunctionCache, kFoldLabeledObservationsDivider); if ( meanScoreOnKFolds > bestMeanScoreOnKFolds ) { bestMeanScoreOnKFolds = meanScoreOnKFolds; bestParameterMap = *hp; bestKernelFunctionKey = kernelFunctionKey; } } } Utils util; if ( util.isEqual(bestMeanScoreOnKFolds, 0.0) ) { m->mothurOut( "failed to train SVM on labels " + toString(label0) + " and " + toString(label1) ); m->mothurOutEndLine(); throw exception(); } else { if ( outputFilter.debug() ) { m->mothurOut( "trained SVM on labels " + label0 + " and " + label1 ); m->mothurOutEndLine(); m->mothurOut( " best mean score over " + toString(trainFoldCount) + " folds is " + toString(bestMeanScoreOnKFolds) ); m->mothurOutEndLine(); m->mothurOut( " best parameters for " + bestKernelFunctionKey + " kernel" ); m->mothurOutEndLine(); for ( ParameterMap::const_iterator p = bestParameterMap.begin(); p != bestParameterMap.end(); p++ ) { m->mothurOut( " " + toString(p->first) + " : " + toString(p->second) ); m->mothurOutEndLine(); } } LabelMatchesEither labelMatchesEither(label0, label1); LabeledObservationVector twoClassDevelopmentObservations; remove_copy_if( developmentObservations.begin(), developmentObservations.end(), back_inserter(twoClassDevelopmentObservations), labelMatchesEither //[&](const LabeledObservation& o){ // return !((o.first == label0) || (o.first == label1)); //} ); if (outputFilter.info()) { m->mothurOut( "training final SVM with " + toString(twoClassDevelopmentObservations.size()) + " labeled observations" ); m->mothurOutEndLine(); for ( ParameterMap::const_iterator i = bestParameterMap.begin(); i != bestParameterMap.end(); i++ ) { m->mothurOut( " " + toString(i->first) + ":" + toString(i->second) ); m->mothurOutEndLine(); } } KernelFunction& kernelFunction = kernelFunctionFactory.getKernelFunctionForKey(bestKernelFunctionKey); kernelFunction.setParameters(bestParameterMap); smoTrainer.setParameters(bestParameterMap); KernelFunctionCache kernelFunctionCache(kernelFunction, svmDataset.getLabeledObservationVector()); SVM* svm = smoTrainer.train(kernelFunctionCache, twoClassDevelopmentObservations); twoClassSvmList.push_back(svm); // return a performance summary using the evaluation dataset LabeledObservationVector twoClassEvaluationObservations; remove_copy_if( evaluationObservations.begin(), evaluationObservations.end(), back_inserter(twoClassEvaluationObservations), labelMatchesEither ); SvmPerformanceSummary p(*svm, twoClassEvaluationObservations); svmToSvmPerformanceSummary[svm->getLabelPair()] = p; } } MultiClassSVM* mc = new MultiClassSVM(twoClassSvmList, labelSet, svmToSvmPerformanceSummary, outputFilter); //double score = mc->score(evaluationObservations); mc->setAccuracy(evaluationObservations); if ( outputFilter.debug() ) { m->mothurOut( "fold " + toString(evaluationFoldNumber) + " multiclass SVM score: " + toString(mc->getAccuracy()) ); m->mothurOutEndLine(); } if ( mc->getAccuracy() > bestMultiClassSvmScore ) { bestMc = mc; bestMultiClassSvmScore = mc->getAccuracy(); } else { delete mc; } } if ( outputFilter.info() ) { m->mothurOut( "best multiclass SVM has score " + toString(bestMc->getAccuracy()) ); m->mothurOutEndLine(); } return bestMc; } //SvmTrainingInterruptedException multiClassSvmTrainingInterruptedException("one-vs-one multiclass SVM training interrupted by user"); double OneVsOneMultiClassSvmTrainer::trainOnKFolds(SmoTrainer& smoTrainer, KernelFunctionCache& kernelFunction, KFoldLabeledObservationsDivider& kFoldLabeledObservationsDivider) { double meanScoreOverKFolds = 0.0; double online_mean_n = 0.0; double online_mean_score = 0.0; meanScoreOverKFolds = -1.0; // means we failed to train a SVM for ( kFoldLabeledObservationsDivider.start(); !kFoldLabeledObservationsDivider.end(); kFoldLabeledObservationsDivider.next() ) { const LabeledObservationVector& kthTwoClassTrainingFold = kFoldLabeledObservationsDivider.getTrainingData(); const LabeledObservationVector& kthTwoClassTestingFold = kFoldLabeledObservationsDivider.getTestingData(); if (outputFilter.info()) { m->mothurOut( "fold " + toString(kFoldLabeledObservationsDivider.getFoldNumber()) + " training data has " + toString(kthTwoClassTrainingFold.size()) + " labeled observations" ); m->mothurOutEndLine(); m->mothurOut( "fold " + toString(kFoldLabeledObservationsDivider.getFoldNumber()) + " testing data has " + toString(kthTwoClassTestingFold.size()) + " labeled observations" ); m->mothurOutEndLine(); } if (m->getControl_pressed()) { return 0; } else { try { if (outputFilter.debug()) m->mothurOut( "begin training" ); m->mothurOutEndLine(); SVM* evaluationSvm = smoTrainer.train(kernelFunction, kthTwoClassTrainingFold); SvmPerformanceSummary svmPerformanceSummary(*evaluationSvm, kthTwoClassTestingFold); double score = evaluationSvm->score(kthTwoClassTestingFold); //double score = svmPerformanceSummary.getAccuracy(); if (outputFilter.debug()) { m->mothurOut( "score on fold " + toString(kFoldLabeledObservationsDivider.getFoldNumber()) + " of test data is " + toString(score) ); m->mothurOutEndLine(); m->mothurOut( "positive label: " + toString(svmPerformanceSummary.getPositiveClassLabel()) ); m->mothurOutEndLine(); m->mothurOut( "negative label: " + toString(svmPerformanceSummary.getNegativeClassLabel()) ); m->mothurOutEndLine(); m->mothurOut( " precision: " + toString(svmPerformanceSummary.getPrecision()) + " recall: " + toString(svmPerformanceSummary.getRecall()) + " f: " + toString(svmPerformanceSummary.getF()) + " accuracy: " + toString(svmPerformanceSummary.getAccuracy()) ); m->mothurOutEndLine(); } online_mean_n += 1.0; double online_mean_delta = score - online_mean_score; online_mean_score += online_mean_delta / online_mean_n; meanScoreOverKFolds = online_mean_score; delete evaluationSvm; } catch ( exception& e ) { m->mothurOut( "exception: " + toString(e.what()) ); m->mothurOutEndLine(); m->mothurOut( " on fold " + toString(kFoldLabeledObservationsDivider.getFoldNumber()) + " failed to train SVM with C = " + toString(smoTrainer.getC()) ); m->mothurOutEndLine(); } } } if (outputFilter.debug()) { m->mothurOut( "done with cross validation on C = " + toString(smoTrainer.getC()) ); m->mothurOutEndLine(); m->mothurOut( " mean score over " + toString(kFoldLabeledObservationsDivider.getFoldNumber()) + " folds is " + toString(meanScoreOverKFolds) ); m->mothurOutEndLine(); } Utils util; if ( util.isEqual(meanScoreOverKFolds, 0.0) ) { m->mothurOut( "failed to train SVM with C = " + toString(smoTrainer.getC()) + "\n"); } return meanScoreOverKFolds; } class UnrankedFeature { public: UnrankedFeature(const Feature& f) : feature(f), rankingCriterion(0.0) {} ~UnrankedFeature() = default; Feature getFeature() const { return feature; } double getRankingCriterion() const { return rankingCriterion; } void setRankingCriterion(double rc) { rankingCriterion = rc; } private: Feature feature; double rankingCriterion; }; bool lessThanRankingCriterion(const UnrankedFeature& a, const UnrankedFeature& b) { return a.getRankingCriterion() < b.getRankingCriterion(); } bool lessThanFeatureIndex(const UnrankedFeature& a, const UnrankedFeature& b) { return a.getFeature().getFeatureIndex() < b.getFeature().getFeatureIndex(); } typedef list UnrankedFeatureList; // Only the linear svm can be used here. // Consider allowing only parameter ranges as arguments. // Right now any kernel can be sent in. // It would be useful to remove more than one feature at a time // Might make sense to turn last two arguments into one RankedFeatureList SvmRfe::getOrderedFeatureList(SvmDataset& svmDataset, OneVsOneMultiClassSvmTrainer& t, const ParameterRange& linearKernelConstantRange, const ParameterRange& smoTrainerParameterRange) { KernelParameterRangeMap rfeKernelParameterRangeMap; ParameterRangeMap linearParameterRangeMap; linearParameterRangeMap[SmoTrainer::MapKey_C] = smoTrainerParameterRange; linearParameterRangeMap[LinearKernelFunction::MapKey_Constant] = linearKernelConstantRange; rfeKernelParameterRangeMap[LinearKernelFunction::MapKey] = linearParameterRangeMap; // the rankedFeatureList is empty at first RankedFeatureList rankedFeatureList; // loop until all but one feature have been eliminated // no need to eliminate the last feature, after all int svmRfeRound = 0; //while ( rankedFeatureList.size() < (svmDataset.getFeatureVector().size()-1) ) { while ( svmDataset.getFeatureVector().size() > 1 ) { svmRfeRound++; m->mothurOut( "SVM-RFE round " + toString(svmRfeRound) + ":" ); m->mothurOutEndLine(); UnrankedFeatureList unrankedFeatureList; for (int featureIndex = 0; featureIndex < svmDataset.getFeatureVector().size(); featureIndex++) { Feature f = svmDataset.getFeatureVector().at(featureIndex); unrankedFeatureList.push_back(UnrankedFeature(f)); } m->mothurOut( toString(unrankedFeatureList.size()) + " unranked features" ); m->mothurOutEndLine(); MultiClassSVM* s = t.train(rfeKernelParameterRangeMap); m->mothurOut( "multiclass SVM accuracy: " + toString(s->getAccuracy()) ); m->mothurOutEndLine(); m->mothurOut( "two-class SVM performance" ); m->mothurOutEndLine(); m->mothurOut("class 1\tclass 2\tprecision\trecall\f\accuracy\n"); for ( SvmVector::const_iterator svm = s->getSvmList().begin(); svm != s->getSvmList().end(); svm++ ) { SvmPerformanceSummary sps = s->getSvmPerformanceSummary(**svm); m->mothurOut(toString(sps.getPositiveClassLabel()) + toString(sps.getNegativeClassLabel()) + toString(sps.getPrecision()) + toString(sps.getRecall()) + toString(sps.getF()) + toString(sps.getAccuracy()) ); m->mothurOutEndLine(); } // calculate the 'ranking criterion' for each (remaining) feature using each binary svm for (UnrankedFeatureList::iterator f = unrankedFeatureList.begin(); f != unrankedFeatureList.end(); f++) { const int i = f->getFeature().getFeatureIndex(); // rankingCriterion combines feature weights for feature i in all svms double rankingCriterion = 0.0; for ( SvmVector::const_iterator svm = s->getSvmList().begin(); svm != s->getSvmList().end(); svm++ ) { // output SVM performance summary // calculate the weight w of feature i for this svm double wi = 0.0; for (int j = 0; j < (*svm)->x.size(); j++) { // all support vectors contribute to wi wi += (*svm)->a.at(j) * (*svm)->y.at(j) * (*svm)->x.at(j).second->at(i); } // accumulate weights for feature i from all svms rankingCriterion += pow(wi, 2); } // update the (unranked) feature ranking criterion f->setRankingCriterion(rankingCriterion); } delete s; // sort the unranked features by ranking criterion unrankedFeatureList.sort(lessThanRankingCriterion); // eliminate the bottom 1/(n+1) features - this is very slow but gives good results ////int eliminateFeatureCount = ceil(unrankedFeatureList.size() / (iterationCount+1.0)); // eliminate the bottom 1/3 features - fast but results slightly different from above // how about 1/4? int eliminateFeatureCount = ceil(unrankedFeatureList.size() / 4.0); m->mothurOut( "eliminating " + toString(eliminateFeatureCount) + " feature(s) of " + toString(unrankedFeatureList.size()) + " total features\n"); m->mothurOutEndLine(); UnrankedFeatureList featuresToEliminate; for ( int i = 0; i < eliminateFeatureCount; i++ ) { // remove the lowest ranked feature(s) from the list of unranked features UnrankedFeature unrankedFeature = unrankedFeatureList.front(); unrankedFeatureList.pop_front(); featuresToEliminate.push_back(unrankedFeature); // put the lowest ranked feature at the front of the list of ranked features // the first feature to be eliminated will be at the back of this list // the last feature to be eliminated will be at the front of this list rankedFeatureList.push_front(RankedFeature(unrankedFeature.getFeature(), svmRfeRound)); } featuresToEliminate.sort(lessThanFeatureIndex); reverse(featuresToEliminate.begin(), featuresToEliminate.end()); for (UnrankedFeatureList::iterator g = featuresToEliminate.begin(); g != featuresToEliminate.end(); g++) { Feature unrankedFeature = g->getFeature(); removeFeature(unrankedFeature, svmDataset.getLabeledObservationVector(), svmDataset.getFeatureVector()); } } // there may be one feature left svmRfeRound++; for ( FeatureVector::iterator f = svmDataset.getFeatureVector().begin(); f != svmDataset.getFeatureVector().end(); f++ ) { rankedFeatureList.push_front(RankedFeature(*f, svmRfeRound)); } return rankedFeatureList; } mothur-1.48.0/source/svm/svm.hpp000077500000000000000000001120221424121717000165450ustar00rootroot00000000000000// // svm.hpp // support vector machine // // Created by Joshua Lynch on 6/19/2013. // Copyright (c) 2013 Schloss Lab. All rights reserved. // #ifndef svm_hpp_ #define svm_hpp_ #include #include #include #include #include #include #include #include #include #include #include "mothurout.h" #include "utils.hpp" // For the purpose of training a support vector machine // we need to calculate a dot product between two feature // vectors. In general these feature vectors are not // restricted to lists of doubles, but in this implementation // feature vectors (or 'observations' as they will be called from here on) // will be vectors of doubles. typedef vector Observation; /* class Observation { public: Observation() = default; ~Observation() = default; private: vector obs; }; */ // A dataset is a collection of labeled observations. // The ObservationVector typedef is a vector // of pointers to ObservationVectors. Pointers are used here since // datasets will be rearranged many times during cross validation. // Using pointers to Observations makes copying the elements of // an ObservationVector cheap. typedef vector ObservationVector; // Training a support vector machine requires labeled data. The // Label typedef defines what will constitute a class 'label' in // this implementation. typedef string Label; typedef vector